James Cole - December 2019. Built with R version 3.5.2
This is Notebook contains the final brain age analysis of MS patient data and controls from the UCL cohort, the MAGNIMS consortium and the Imperial College London PET study (n=25). The analysis uses brain-predicted age difference (brain-PAD) to look at brain ageing in the context of MS. The brain-PAD values were generated in PRONTO, using an independent healthy (n=2001) training dataset, and the values were corrected for proportional bias using the intercept and slope of the age by brain-predicted age regression in the training dataset.
Set-up
Clear workspace, set colour palette
rm(list = ls()) ## clear workspace
ms.palette <- c("darkgreen", "darkorange", "red", "blue", "purple") # define MS colour scheme for groups
sessionInfo()
## R version 3.5.2 (2018-12-20)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS 10.15.4
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_GB.UTF-8/en_GB.UTF-8/en_GB.UTF-8/C/en_GB.UTF-8/en_GB.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] tidyr_1.0.0 survminer_0.4.3 ggpubr_0.2
## [4] magrittr_1.5 survival_2.43-3 stringr_1.3.1
## [7] sjPlot_2.8.2 sjmisc_2.8.2 scales_1.0.0
## [10] RColorBrewer_1.1-2 qwraps2_0.4.1 psych_1.8.12
## [13] pryr_0.1.4 ppcor_1.1 plyr_1.8.4
## [16] plotrix_3.7-4 metafor_2.0-0 MASS_7.3-51.1
## [19] lmerTest_3.0-1 lme4_1.1-21 Matrix_1.2-15
## [22] lm.beta_1.5-1 knitr_1.21 kableExtra_1.1.0
## [25] jtools_1.1.1 hier.part_1.0-4 gtools_3.8.1
## [28] gridExtra_2.3 glmmTMB_0.2.3 ggplot2_3.2.1
## [31] ggstance_0.3.1 emmeans_1.4.2 effects_4.1-4
## [34] dplyr_0.8.3 data.table_1.11.8 cowplot_1.0.0
## [37] corrplot_0.84 car_3.0-2 carData_3.0-2
##
## loaded via a namespace (and not attached):
## [1] minqa_1.2.4 colorspace_1.3-2 rio_0.5.16
## [4] sjlabelled_1.1.2 estimability_1.3 parameters_0.4.1
## [7] rstudioapi_0.10 mvtnorm_1.0-8 xml2_1.2.0
## [10] codetools_0.2-16 splines_3.5.2 mnormt_1.5-5
## [13] zeallot_0.1.0 nloptr_1.2.1 ggeffects_0.14.0
## [16] km.ci_0.5-2 broom_0.5.2 effectsize_0.1.1
## [19] readr_1.3.1 compiler_3.5.2 httr_1.4.0
## [22] sjstats_0.17.8 backports_1.1.3 assertthat_0.2.0
## [25] lazyeval_0.2.1 survey_3.36 cli_1.0.1
## [28] htmltools_0.3.6 tools_3.5.2 coda_0.19-3
## [31] gtable_0.2.0 glue_1.3.1 Rcpp_1.0.2
## [34] cellranger_1.1.0 vctrs_0.2.0 nlme_3.1-137
## [37] insight_0.8.0 xfun_0.4 openxlsx_4.1.0
## [40] rvest_0.3.3 lifecycle_0.1.0 zoo_1.8-4
## [43] hms_0.4.2 parallel_3.5.2 TMB_1.7.15
## [46] yaml_2.2.0 curl_3.2 KMsurv_0.1-5
## [49] stringi_1.2.4 bayestestR_0.5.1 boot_1.3-21
## [52] zip_1.0.0 rlang_0.4.0 pkgconfig_2.0.2
## [55] evaluate_0.12 lattice_0.20-38 purrr_0.3.2
## [58] cmprsk_2.2-7 tidyselect_0.2.5 R6_2.3.0
## [61] generics_0.0.2 DBI_1.0.0 pillar_1.3.1
## [64] haven_2.0.0 foreign_0.8-71 withr_2.1.2
## [67] abind_1.4-5 nnet_7.3-12 tibble_2.1.3
## [70] performance_0.4.3 modelr_0.1.4 crayon_1.3.4
## [73] survMisc_0.5.5 rmarkdown_1.11 grid_3.5.2
## [76] readxl_1.2.0 forcats_0.3.0 digest_0.6.18
## [79] webshot_0.5.1 xtable_1.8-3 numDeriv_2016.8-1
## [82] munsell_0.5.0 viridisLite_0.3.0 mitools_2.4
Get data from CSV and define longitudinal data.frames
df <- read.csv("MS_brain_age_final_long.csv")
df$subtype <- factor(df$subtype, levels = c("control", "CIS", "RRMS", "SPMS", "PPMS")) # reorder subtype factor to put controls first
df$Cohort <- recode(df$Cohort, JR1 = "Imperial", C0 = "UCL0", C1 = "UCL1", C2 = "UCL2", C3 = "UCL3", C4 = "UCL4" ,C5 = "UCL5", C6 = "UCL6", C7 = "UCL7", A = "Amsterdam", B = "Barcelona", G = "Graz", M = "Milan", R = "Rome", S = "Siena")
df$wb_vol_ratio_icv <- df$WBV / df$ICV
Get data on Brain Age Healthy Training dataset
banc <- read.csv("/Users/jcole/Work/Brain ageing/BANC_2015/BANC_N2003_age_sex_Brain_age_predictions.csv")
Exclude participants with errors in the database & correct time since diagnosis errors
tmp <- df[df$Cohort == 'Amsterdam',] %>% group_by(PatientID) %>% dplyr::summarize(sd = sd(age_at_scan)) %>% arrange(desc(sd)) %>% filter(sd > 2)
excluded_IDs <- sort(tmp$PatientID)
# excluded_IDs <- unique(df[which(df$age_at_scan < df$age_at_baseline_scan1),1])
excluded_scans <- (df %>% filter(str_detect(PatientID, paste(excluded_IDs, collapse = "|"))))["ScanName"]
df <- df %>%
filter(!str_detect(PatientID, paste(excluded_IDs, collapse = "|")))
rm(tmp)
There were 13 subjects with 38 scans excluded in total. Data entry errors in original spreadsheet; age at baseline not consistent within subject.
Load data for lesion filling analysis
## read in data
lesion_df <- read.csv(file = '~/Work/Brain ageing/Collaborations/MS/MAGNIMS/MAGNIMS20171224_final.csv')
lesion_df$subtype <- factor(lesion_df$subtype, levels = c("control", "CIS", "RRMS", "SPMS", "PPMS"))
## create ICV ratio variables
lesion_df <- lesion_df %>%
mutate(ICV = GMV + WMV + CSFV) %>%
mutate(gm_icv_ratio = GMV/ICV) %>%
mutate(wm_icv_ratio = WMV/ICV)
Recode scanner status variable to field strength
Pre-post 2004 data only available for one site. Recode to 1.5T or 3T.
table(df$scanner_status) %>%
kable(col.names = c("Scanner status", "Frequency")) %>%
kable_styling(full_width = F, position = "left")
|
Scanner status
|
Frequency
|
|
1.5T
|
2257
|
|
1.5T_post_2004
|
19
|
|
1.5T_pre_2004
|
447
|
|
3T
|
842
|
df$field_strength <- recode(df$scanner_status, `1.5T_pre_2004` = "1.5T", `1.5T_post_2004` = "1.5T")
table(df$field_strength) %>%
kable(col.names = c("Field strength", "Frequency")) %>%
kable_styling(full_width = F, position = "left")
|
Field strength
|
Frequency
|
|
1.5T
|
2723
|
|
3T
|
842
|
Generate baseline only data.frame and show data frame structure
df.bl <- df[(df$age_at_baseline_scan1 == df$age_at_scan),] # define baseline data.frame
str(df)
## 'data.frame': 3565 obs. of 34 variables:
## $ PatientID : Factor w/ 1367 levels "AMSTERDAM_4001",..: 1 1 1 2 2 2 3 3 3 4 ...
## $ Cohort : Factor w/ 15 levels "Amsterdam","Barcelona",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ scanner_status : Factor w/ 4 levels "1.5T","1.5T_post_2004",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ gender : Factor w/ 2 levels "female","male": 2 2 2 1 1 1 1 1 1 1 ...
## $ control0ppms1cisoforever2other3 : int 3 3 3 3 3 3 3 3 3 3 ...
## $ control0rest1 : Factor w/ 2 levels "control","MS": 2 2 2 2 2 2 2 2 2 2 ...
## $ control0ms1cis2 : Factor w/ 3 levels "CIS","control",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ age_at_baseline_scan1 : num 57.8 57.8 57.8 44.4 44.4 ...
## $ disease_duration_at_baseline_scan1: num 6.8 6.8 6.8 6.38 6.38 ...
## $ DMT_YesNoNA : Factor w/ 2 levels "NO","YES": 1 1 1 1 1 1 2 2 2 2 ...
## $ DMTYes1 : Factor w/ 2 levels "No treatment",..: 1 1 1 1 1 1 2 2 2 2 ...
## $ EDSSbaseline : num 2.5 2.5 2.5 3 3 3 4 4 4 2.5 ...
## $ NoScans : int 3 3 3 3 3 3 3 3 3 2 ...
## $ FUTimeMax : num 2.32 2.32 2.32 2.35 2.35 ...
## $ ScanName : Factor w/ 3603 levels "AMSTERDAM_4001_baseline_t1",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ age_at_scan : num 57.8 59.1 60.1 44.4 45.5 ...
## $ EDSSatScan : num 2.5 3.5 3 3 2 2.5 4 2.5 4 2.5 ...
## $ BrainAge : num 69.6 73.4 71 58.1 61.8 ...
## $ BrainPAD : num 11.8 14.3 10.8 13.7 16.3 ...
## $ subtype : Factor w/ 5 levels "control","CIS",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ disease_onset_age : num 51 51 51 38 38 ...
## $ disease_duration : num 6.8 6.8 6.8 6.38 6.38 ...
## $ interval : num 0 1.3 2.32 0 1.16 ...
## $ scan_number : Factor w/ 10 levels "scan1","scan10",..: 1 3 4 1 3 4 1 3 4 1 ...
## $ GM_vol : num 0.574 0.563 0.573 0.622 0.605 0.59 0.647 0.643 0.633 0.648 ...
## $ WM_vol : num 0.536 0.532 0.52 0.43 0.423 0.413 0.404 0.399 0.387 0.599 ...
## $ CSF_vol : num 0.352 0.349 0.367 0.293 0.309 0.333 0.355 0.358 0.386 0.271 ...
## $ WBV : num 1.11 1.09 1.09 1.05 1.03 ...
## $ ICV : num 1.46 1.44 1.46 1.34 1.34 ...
## $ gm_vol_ratio_icv : num 0.393 0.39 0.392 0.462 0.453 ...
## $ wm_vol_ratio_icv : num 0.367 0.368 0.356 0.32 0.316 ...
## $ csf_vol_ratio_icv : num 0.241 0.242 0.251 0.218 0.231 ...
## $ wb_vol_ratio_icv : num 0.759 0.758 0.749 0.782 0.769 ...
## $ field_strength : Factor w/ 2 levels "1.5T","3T": 1 1 1 1 1 1 1 1 1 1 ...
Basic stats
Total number of subjects, and by group
The total number of subjects included was n = 1354
The total number of MS patients (including CIS) was n = 1204 and healthy controls was n = 150
Number of scans in total and per group
Total number of scans = 3565
Number of people with 2 or more scans
df.bl %>%
filter(NoScans >= 2) %>%
group_by(control0rest1) %>%
tally() %>%
kable(col.names = c("Group", "n")) %>%
kable_styling(full_width = F, position = "left")
|
Group
|
n
|
|
control
|
111
|
|
MS
|
1155
|
Number of people with 3 or more scans
df.bl %>%
filter(NoScans >= 3) %>%
group_by(control0rest1) %>%
tally() %>%
kable(col.names = c("Group", "n")) %>%
kable_styling(full_width = F, position = "left")
|
Group
|
n
|
|
control
|
64
|
|
MS
|
509
|
Generate demographics table using qwarps2
options(qwraps2_markup = "markdown", digits = 2)
table1 <-
list("N" =
list("Control" = ~ qwraps2::n_perc0(control0rest1 == "control", na_rm = T),
"MS" = ~ qwraps2::n_perc0(control0rest1 == "MS", na_rm = T)),
"N with follow-up" =
list("Yes" = ~ sum(NoScans>1)),
"Gender" =
list("Female" = ~ qwraps2::n_perc0(gender == "female", show_symbol = T),
"Male" = ~ qwraps2::n_perc0(gender == "male", show_symbol = T)),
"Number of scans" =
list("min" = ~ min(NoScans),
"max" = ~ max(NoScans),
"mean (sd)" = ~ qwraps2::mean_sd(NoScans)),
"Age at baseline scan (years)" =
list("min" = ~ min(age_at_baseline_scan1),
"max" = ~ max(age_at_baseline_scan1),
"mean (sd)" = ~ qwraps2::mean_sd(age_at_baseline_scan1)),
"Brain-predicted age at baseline scan (years)" =
list("min" = ~ min(BrainAge),
"max" = ~ max(BrainAge),
"mean (sd)" = ~ qwraps2::mean_sd(BrainAge)),
"Disease duration at baseline (years)" =
list("min" = ~ min(disease_duration, na.rm = T),
"max" = ~ max(disease_duration, na.rm = T),
"mean (sd)" = ~ qwraps2::mean_sd(disease_duration, na_rm = T, show_n = "never")),
"EDSS at baseline " =
list("min" = ~ min(EDSSbaseline, na.rm = T),
"max" = ~ max(EDSSbaseline, na.rm = T),
"mean (sd)" = ~ qwraps2::mean_sd(EDSSbaseline, na_rm = T, show_n = "never")),
"MS subtype" =
list("CIS" = ~ qwraps2::n_perc0(subtype == "CIS", show_symbol = T),
"RRMS" = ~ qwraps2::n_perc0(subtype == "RRMS", show_symbol = T),
"SPMS" = ~ qwraps2::n_perc0(subtype == "SPMS", show_symbol = T),
"PPMS" = ~ qwraps2::n_perc0(subtype == "PPMS", show_symbol = T)),
"Treatment" =
list("Yes" = ~ qwraps2::n_perc0(DMT_YesNoNA == "YES", na_rm = T, show_symbol = T),
"No" = ~ qwraps2::n_perc0(DMT_YesNoNA == "NO", na_rm = T, show_symbol = T),
"Missing" = ~ sum(is.na(DMT_YesNoNA)))
)
demo.table <- summary_table(dplyr::group_by(df.bl, control0rest1), table1)
print(demo.table, cnames = c("Controls", "MS patients"), rtitle = "Sample Chararcteristics")
| N |
  |
  |
| Â Â Control |
150 (100) |
0 (0) |
| Â Â MS |
0 (0) |
1,204 (100) |
| N with follow-up |
  |
  |
| Â Â Yes |
111 |
1155 |
| Gender |
  |
  |
| Â Â Female |
82 (55%) |
771 (64%) |
| Â Â Male |
68 (45%) |
433 (36%) |
| Number of scans |
  |
  |
| Â Â min |
1 |
1 |
| Â Â max |
10 |
7 |
| Â Â mean (sd) |
2.82 ± 1.90 |
2.61 ± 1.01 |
| Age at baseline scan (years) |
  |
  |
| Â Â min |
23 |
15 |
| Â Â max |
66 |
68 |
| Â Â mean (sd) |
37.29 ± 9.96 |
39.41 ± 10.76 |
| Brain-predicted age at baseline scan (years) |
  |
  |
| Â Â min |
14.5 |
7.4 |
| Â Â max |
70 |
92 |
| Â Â mean (sd) |
38.43 ± 11.12 |
50.27 ± 14.90 |
| Disease duration at baseline (years) |
  |
  |
| Â Â min |
Inf |
0 |
| Â Â max |
-Inf |
48 |
| Â Â mean (sd) |
NaN ± NA |
7.26 ± 7.96 |
| EDSS at baseline |
  |
  |
| Â Â min |
Inf |
0 |
| Â Â max |
-Inf |
9 |
| Â Â mean (sd) |
NaN ± NA |
2.60 ± 1.95 |
| MS subtype |
  |
  |
| Â Â CIS |
0 (0%) |
296 (25%) |
| Â Â RRMS |
0 (0%) |
677 (56%) |
| Â Â SPMS |
0 (0%) |
111 (9%) |
| Â Â PPMS |
0 (0%) |
120 (10%) |
| Treatment |
  |
  |
| Â Â Yes |
0 (NaN%) |
475 (41%) |
| Â Â No |
0 (NaN%) |
675 (59%) |
| Â Â Missing |
150 |
54 |
Length of follow-up
Get length of follow-up from longitudinal database.
df %>%
filter(NoScans >= 2) %>%
group_by(PatientID) %>%
slice(which.max(interval)) %>%
group_by(control0rest1) %>%
dplyr::summarise(mean(interval), sd(interval), min(interval), max(interval)) %>%
kable(digits = 2) %>%
kable_styling(full_width = F, position = "left")
|
control0rest1
|
mean(interval)
|
sd(interval)
|
min(interval)
|
max(interval)
|
|
control
|
2.0
|
1.4
|
0.50
|
6
|
|
MS
|
3.4
|
3.1
|
0.23
|
15
|
Demographics by MS subtype
options(qwraps2_markup = "markdown", digits = 2)
table1 <-
list("N with follow-up" =
list("Yes" = ~ sum(NoScans>1)),
"Gender" =
list("Female" = ~ qwraps2::n_perc0(gender == "female", show_symbol = T),
"Male" = ~ qwraps2::n_perc0(gender == "male", show_symbol = T)),
"Number of scans" =
list("min" = ~ min(NoScans),
"max" = ~ max(NoScans),
"mean (sd)" = ~ qwraps2::mean_sd(NoScans)),
"Age at baseline scan (years)" =
list("min" = ~ min(age_at_baseline_scan1),
"max" = ~ max(age_at_baseline_scan1),
"mean (sd)" = ~ qwraps2::mean_sd(age_at_baseline_scan1)),
"Brain-predicted age at baseline scan (years)" =
list("min" = ~ min(BrainAge),
"max" = ~ max(BrainAge),
"mean (sd)" = ~ qwraps2::mean_sd(BrainAge)),
"Disease duration at baseline (years)" =
list("min" = ~ min(disease_duration, na.rm = T),
"max" = ~ max(disease_duration, na.rm = T),
"mean (sd)" = ~ qwraps2::mean_sd(disease_duration, na_rm = T, show_n = "never")),
"EDSS at baseline " =
list("min" = ~ min(EDSSbaseline, na.rm = T),
"max" = ~ max(EDSSbaseline, na.rm = T),
"mean (sd)" = ~ qwraps2::mean_sd(EDSSbaseline, na_rm = T, show_n = "never")),
"Treatment" =
list("Yes" = ~ qwraps2::n_perc0(DMT_YesNoNA == "YES", na_rm = T, show_symbol = T),
"No" = ~ qwraps2::n_perc0(DMT_YesNoNA == "NO", na_rm = T, show_symbol = T),
"Missing" = ~ sum(is.na(DMT_YesNoNA)))
)
print(summary_table(dplyr::group_by(df.bl, subtype), table1),
rtitle = "Sample Chararcteristics",
cnames = c("Controls", "CIS", "RRMS", "SPMS", "PPMS"))
| N with follow-up |
  |
  |
  |
  |
  |
| Â Â Yes |
111 |
279 |
653 |
104 |
119 |
| Gender |
  |
  |
  |
  |
  |
| Â Â Female |
82 (55%) |
199 (67%) |
453 (67%) |
67 (60%) |
52 (43%) |
| Â Â Male |
68 (45%) |
97 (33%) |
224 (33%) |
44 (40%) |
68 (57%) |
| Number of scans |
  |
  |
  |
  |
  |
| Â Â min |
1 |
1 |
1 |
1 |
1 |
| Â Â max |
10 |
5 |
7 |
3 |
5 |
| Â Â mean (sd) |
2.82 ± 1.90 |
2.44 ± 0.98 |
2.71 ± 1.05 |
2.25 ± 0.56 |
2.80 ± 1.07 |
| Age at baseline scan (years) |
  |
  |
  |
  |
  |
| Â Â min |
23 |
15 |
18 |
30 |
19 |
| Â Â max |
66 |
55 |
66 |
68 |
65 |
| Â Â mean (sd) |
37.29 ± 9.96 |
33.01 ± 8.08 |
38.83 ± 9.68 |
50.11 ± 9.39 |
48.55 ± 10.04 |
| Brain-predicted age at baseline scan (years) |
  |
  |
  |
  |
  |
| Â Â min |
14.5 |
13.7 |
7.4 |
40.2 |
31.0 |
| Â Â max |
70 |
82 |
92 |
89 |
84 |
| Â Â mean (sd) |
38.43 ± 11.12 |
37.60 ± 10.01 |
51.33 ± 13.32 |
67.36 ± 10.42 |
59.77 ± 10.90 |
| Disease duration at baseline (years) |
  |
  |
  |
  |
  |
| Â Â min |
Inf |
0.0 |
0.0 |
2.5 |
1.0 |
| Â Â max |
-Inf |
18 |
42 |
48 |
27 |
| Â Â mean (sd) |
NaN ± NA |
0.52 ± 1.50 |
7.67 ± 7.31 |
17.44 ± 9.04 |
6.65 ± 5.63 |
| EDSS at baseline |
  |
  |
  |
  |
  |
| Â Â min |
Inf |
0 |
0 |
3 |
2 |
| Â Â max |
-Inf |
4.5 |
6.5 |
9.0 |
8.0 |
| Â Â mean (sd) |
NaN ± NA |
1.36 ± 1.02 |
2.12 ± 1.40 |
5.83 ± 1.20 |
5.10 ± 1.32 |
| Treatment |
  |
  |
  |
  |
  |
| Â Â Yes |
0 (NaN%) |
60 (20%) |
356 (56%) |
51 (50%) |
8 (7%) |
| Â Â No |
0 (NaN%) |
234 (80%) |
285 (44%) |
52 (50%) |
104 (93%) |
| Â Â Missing |
150 |
2 |
36 |
8 |
8 |
Length of follow-up
Get length of follow-up from longitudinal database.
df %>%
filter(NoScans >= 2) %>%
group_by(PatientID) %>%
slice(which.max(interval)) %>%
# top_n(n = 1, wt = interval) %>%
group_by(subtype) %>%
dplyr::summarise(n = n(), mean = mean(interval), SD = sd(interval), min = min(interval), max = max(interval)) %>%
kable(digits = 2) %>%
kable_styling(full_width = F, position = "left")
|
subtype
|
n
|
mean
|
SD
|
min
|
max
|
|
control
|
111
|
2.0
|
1.4
|
0.50
|
6.0
|
|
CIS
|
279
|
3.6
|
4.0
|
0.23
|
15.0
|
|
RRMS
|
653
|
3.6
|
3.1
|
0.45
|
15.0
|
|
SPMS
|
104
|
2.4
|
1.1
|
0.51
|
5.5
|
|
PPMS
|
119
|
2.9
|
1.6
|
0.83
|
6.0
|
Correlations between demographics and clinical variables
Use corrplot package.
cor.mat <- cbind(df.bl$age_at_scan, df.bl$disease_onset_age, df.bl$disease_duration, df.bl$EDSSatScan)
colnames(cor.mat) <- c("Age at scan", "Age at diagnosis", "Time since diagnosis", "EDSS at scan")
corrplot(cor(cor.mat, use = "pairwise"), type = "upper", method = "color", addCoef.col = T, tl.col = "black", diag = F)

Age histograms
p <- ggplot() + xlab("Age (years)") + xlim(c(18,90)) + theme_cowplot()
banc_plot <- p + geom_histogram(aes(x = age), fill = "darkgoldenrod2", binwidth = 2, data = banc) + labs(title = "Training data", subtitle = "Healthy participants")
ms_plot <- p + geom_histogram(aes(x = age_at_scan), fill = "firebrick", binwidth = 2, data = subset(df.bl, df.bl$control0rest1 == "MS")) + labs(title = "Test data", subtitle = "MS and CIS patients")
hc_plot <- p + geom_histogram(aes(x = age_at_scan), fill = "darkgreen", binwidth = 2, data = subset(df.bl, df.bl$control0rest1 == "control")) + labs(title = "Test data", subtitle = "Healthy controls")
p2 <- cowplot::plot_grid(banc_plot, ms_plot, hc_plot, labels = "AUTO", nrow = 1)
p2

cowplot::save_plot("~/Work/Articles/Brain age/MS/figures/BANC_MAGNIMS_age_histograms.pdf", p2, base_asp = 2.5)
Baseline brain-age analysis
describeBy(df.bl$BrainPAD, df.bl$control0rest1, mat = T, digits = 3) # brain-PAD by MS patient vs. controls
## item group1 vars n mean sd median trimmed mad min max range
## X11 1 control 1 150 1.1 6.7 0.64 1.2 6.5 -17 22 39
## X12 2 MS 1 1204 10.9 10.2 9.44 10.2 9.7 -18 49 67
## skew kurtosis se
## X11 0.032 0.072 0.55
## X12 0.600 0.315 0.29
Evalute potential covariates
Distribution of scanner field strengths across study sites.
table(df.bl$Cohort, df.bl$field_strength) %>%
kable() %>%
kable_styling(full_width = F, position = "left", fixed_thead = T)
|
|
1.5T
|
3T
|
|
Amsterdam
|
193
|
0
|
|
Barcelona
|
62
|
90
|
|
UCL0
|
0
|
21
|
|
UCL1
|
148
|
6
|
|
UCL2
|
0
|
16
|
|
UCL3
|
0
|
147
|
|
UCL4
|
0
|
35
|
|
UCL5
|
50
|
0
|
|
UCL6
|
59
|
0
|
|
UCL7
|
34
|
0
|
|
Graz
|
174
|
0
|
|
Imperial
|
0
|
25
|
|
Milan
|
60
|
54
|
|
Rome
|
71
|
0
|
|
Siena
|
109
|
0
|
fit <- lm(BrainPAD ~ poly(age_at_baseline_scan1, 2) + gender + ICV + field_strength + Cohort, data = df.bl)
anova(fit)
## Analysis of Variance Table
##
## Response: BrainPAD
## Df Sum Sq Mean Sq F value Pr(>F)
## poly(age_at_baseline_scan1, 2) 2 445 223 2.36 0.0948 .
## gender 1 741 741 7.86 0.0051 **
## ICV 1 12 12 0.13 0.7223
## field_strength 1 693 693 7.34 0.0068 **
## Cohort 14 17705 1265 13.40 <2e-16 ***
## Residuals 1334 125886 94
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Hierarchical partitioning of brain-PAD
a <- lm(BrainPAD ~ poly(age_at_baseline_scan1, 2) + gender + wb_vol_ratio_icv + field_strength + Cohort, data = df.bl)
summary(a)
##
## Call:
## lm(formula = BrainPAD ~ poly(age_at_baseline_scan1, 2) + gender +
## wb_vol_ratio_icv + field_strength + Cohort, data = df.bl)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.64 -4.49 -0.23 4.18 32.06
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 142.658 3.724 38.31 < 2e-16 ***
## poly(age_at_baseline_scan1, 2)1 -164.500 8.746 -18.81 < 2e-16 ***
## poly(age_at_baseline_scan1, 2)2 -11.913 7.075 -1.68 0.09247 .
## gendermale -1.159 0.403 -2.87 0.00414 **
## wb_vol_ratio_icv -164.464 4.565 -36.03 < 2e-16 ***
## field_strength3T -1.744 0.826 -2.11 0.03491 *
## CohortBarcelona -3.691 0.917 -4.03 6.0e-05 ***
## CohortUCL0 0.774 1.803 0.43 0.66778
## CohortUCL1 -2.111 0.784 -2.69 0.00716 **
## CohortUCL2 -0.855 2.007 -0.43 0.67018
## CohortUCL3 -3.075 1.124 -2.74 0.00630 **
## CohortUCL4 -5.467 1.521 -3.60 0.00034 ***
## CohortUCL5 -1.756 1.116 -1.57 0.11591
## CohortUCL6 -1.745 1.034 -1.69 0.09173 .
## CohortUCL7 -7.367 1.318 -5.59 2.8e-08 ***
## CohortGraz -4.994 0.770 -6.49 1.2e-10 ***
## CohortImperial -5.469 1.692 -3.23 0.00126 **
## CohortMilan 4.660 0.919 5.07 4.6e-07 ***
## CohortRome -0.604 0.968 -0.62 0.53284
## CohortSiena 4.164 0.852 4.89 1.1e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.9 on 1334 degrees of freedom
## Multiple R-squared: 0.561, Adjusted R-squared: 0.555
## F-statistic: 89.9 on 19 and 1334 DF, p-value: <2e-16
I = independent variance; J = joint variance between >= 2 variables.
h.p <- hier.part(y = df.bl$BrainPAD,
xcan = df.bl[c("age_at_baseline_scan1","gender", "wb_vol_ratio_icv", "field_strength", "Cohort")],
gof = "Rsqu",
barplot = FALSE)
round(h.p$IJ, 3) %>%
kable() %>%
kable_styling(full_width = F, position = "left")
|
|
I
|
J
|
Total
|
|
age_at_baseline_scan1
|
0.06
|
-0.06
|
0.00
|
|
gender
|
0.00
|
0.00
|
0.00
|
|
wb_vol_ratio_icv
|
0.39
|
-0.04
|
0.34
|
|
field_strength
|
0.01
|
0.00
|
0.00
|
|
Cohort
|
0.10
|
0.01
|
0.12
|
Percentage of the explained variance.
round(h.p$I.perc, 1) %>%
kable() %>%
kable_styling(full_width = F, position = "left")
|
|
I
|
|
age_at_baseline_scan1
|
10.0
|
|
gender
|
0.7
|
|
wb_vol_ratio_icv
|
69.3
|
|
field_strength
|
1.1
|
|
Cohort
|
18.8
|
bar.colors <- c("darkgoldenrod2", "firebrick")
barplot(t(as.matrix(h.p$IJ[,1:2])),
names.arg = c("Age", "Sex", "NBV", "Field strength", "Cohort"),
col = bar.colors,
ylim = c(0,0.4),
xlab = "Model predictors",
ylab = expression(paste("Brain-PAD variance explained R"^"2")))
legend("topright", c("Unique variance", "Shared variance"), fill = bar.colors, bty = "n", title = "Legend")

Results suggest that age, age^2, normalised brain volume (AKA wb_vol_ratio_icv), gender, Cohort and field strength are appropriate covariates.
Predict brain-PAD based on group
Function to run a linear mixed effect (LME) model adjusting for: fixed effects of age, age^2, gender and field strength; random effects of cohort.
run_lm <- function(var, data) {
m1 <- lmer(BrainPAD ~ data[[var]] +
poly(age_at_scan, 2) +
gender +
field_strength +
wb_vol_ratio_icv +
(1|Cohort),
data = data,
control = lmerControl(optimizer = "Nelder_Mead"))
return(m1)
}
Main effect of group (MS vs. controls)
fit <- run_lm("control0rest1", df.bl)
summary(fit)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +
## wb_vol_ratio_icv + (1 | Cohort)
## Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 9035
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.138 -0.642 -0.033 0.619 4.726
##
## Random effects:
## Groups Name Variance Std.Dev.
## Cohort (Intercept) 8.64 2.94
## Residual 45.86 6.77
## Number of obs: 1354, groups: Cohort, 15
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 129.672 3.907 1183.794 33.19 < 2e-16 ***
## data[[var]]MS 6.043 0.770 1265.898 7.85 8.8e-15 ***
## poly(age_at_scan, 2)1 -166.368 8.526 1347.000 -19.51 < 2e-16 ***
## poly(age_at_scan, 2)2 -15.356 6.924 1342.944 -2.22 0.027 *
## gendermale -0.936 0.396 1338.609 -2.37 0.018 *
## field_strength3T -1.855 0.735 408.407 -2.52 0.012 *
## wb_vol_ratio_icv -156.850 4.552 1346.999 -34.46 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) d[[]]M p(__,2)1 p(__,2)2 gndrml fld_3T
## dat[[vr]]MS -0.366
## ply(g__,2)1 -0.430 -0.040
## ply(g__,2)2 0.049 -0.063 0.010
## gendermale -0.240 0.070 0.121 0.049
## fld_strng3T -0.026 0.012 -0.009 -0.025 -0.051
## wb_vl_rt_cv -0.961 0.212 0.466 -0.042 0.209 -0.051
Estimated marginal means
Generate EMMs for all MS/CIS and healthy controls. Need to re-fit same model as above so that lme4 object can be read by emmeans.
fit <- lmer(BrainPAD ~ control0rest1 +
poly(age_at_scan, 2) +
gender +
field_strength +
wb_vol_ratio_icv +
(1|Cohort),
data = df.bl,
control = lmerControl(optimizer = "Nelder_Mead"))
emmeans(object = fit, pairwise ~ control0rest1)
## $emmeans
## control0rest1 emmean SE df lower.CL upper.CL
## control 4.2 1.04 37 2.1 6.4
## MS 10.3 0.83 16 8.5 12.1
##
## Results are averaged over the levels of: gender, field_strength
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## control - MS -6 0.77 1261 -7.800 <.0001
##
## Results are averaged over the levels of: gender, field_strength
## Degrees-of-freedom method: kenward-roger
Meta-analysis looking at all the separate cohorts with MS/CIS patients and controls
Check which cohorts contain healthy controls and patients.
table(df.bl$subtype, df.bl$Cohort) %>%
kable() %>%
kable_styling(full_width = F, position = "left")
|
|
Amsterdam
|
Barcelona
|
UCL0
|
UCL1
|
UCL2
|
UCL3
|
UCL4
|
UCL5
|
UCL6
|
UCL7
|
Graz
|
Imperial
|
Milan
|
Rome
|
Siena
|
|
control
|
0
|
0
|
0
|
0
|
0
|
82
|
16
|
17
|
17
|
10
|
0
|
8
|
0
|
0
|
0
|
|
CIS
|
4
|
84
|
0
|
75
|
0
|
0
|
0
|
0
|
0
|
24
|
95
|
0
|
11
|
1
|
2
|
|
RRMS
|
131
|
63
|
21
|
77
|
0
|
28
|
0
|
33
|
0
|
0
|
69
|
16
|
66
|
66
|
107
|
|
SPMS
|
36
|
5
|
0
|
2
|
7
|
23
|
0
|
0
|
0
|
0
|
9
|
1
|
24
|
4
|
0
|
|
PPMS
|
22
|
0
|
0
|
0
|
9
|
14
|
19
|
0
|
42
|
0
|
1
|
0
|
13
|
0
|
0
|
Forest plot of results
plot.forest %<a-% {
forest(meta.results, ilab = cbind(meta.df$MS_n, meta.df$control_n), ilab.xpos = c(-30,-23), slab = meta.df$Cohort, digits = 1, xlab = "MS vs. Healthy control group mean difference", steps = 6, col = "red", cex = 1.25, pch = 22, bg = "blue"); text(c(-40, -30, -23), 7.6, c("Cohort", "MS n", "HC n"), font = 2, cex = 1.25)
}
plot.forest

cairo_pdf("plots/forest_plot.pdf", 6,5)
plot.forest
dev.off()
## quartz_off_screen
## 2
Linear regression analysis in cohort UCL3
Includes covariates: age, age^2, gender.
summary(lm(BrainPAD ~ control0rest1 + poly(age_at_baseline_scan1, 2) + gender + wb_vol_ratio_icv, data = subset(df.bl, df.bl$Cohort == "UCL3")))
##
## Call:
## lm(formula = BrainPAD ~ control0rest1 + poly(age_at_baseline_scan1,
## 2) + gender + wb_vol_ratio_icv, data = subset(df.bl, df.bl$Cohort ==
## "UCL3"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -20.74 -3.82 0.12 4.08 14.32
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 98.758 9.912 9.96 < 2e-16 ***
## control0rest1MS 9.296 1.331 6.98 1.0e-10 ***
## poly(age_at_baseline_scan1, 2)1 -46.263 7.574 -6.11 9.3e-09 ***
## poly(age_at_baseline_scan1, 2)2 0.207 6.350 0.03 0.97
## gendermale -0.740 1.103 -0.67 0.50
## wb_vol_ratio_icv -121.374 12.063 -10.06 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.3 on 141 degrees of freedom
## Multiple R-squared: 0.672, Adjusted R-squared: 0.661
## F-statistic: 57.9 on 5 and 141 DF, p-value: <2e-16
Brain-PAD by MS subtype
Using a linear mixed effect (LME) model, to compare subtypes. This analysis excluded controls. Adjusting for age, age^2, gender, field strength, normalised brain volume and cohort.
subtypes <- run_lm("subtype", subset(df.bl, df.bl$subtype != "control"))
summary(subtypes)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +
## wb_vol_ratio_icv + (1 | Cohort)
## Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 7970
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.860 -0.652 -0.013 0.607 4.523
##
## Random effects:
## Groups Name Variance Std.Dev.
## Cohort (Intercept) 5.35 2.31
## Residual 43.86 6.62
## Number of obs: 1204, groups: Cohort, 15
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 126.193 3.956 1133.060 31.90 < 2e-16 ***
## data[[var]]RRMS 5.122 0.583 927.421 8.78 < 2e-16 ***
## data[[var]]SPMS 6.585 0.938 1127.828 7.02 3.8e-12 ***
## data[[var]]PPMS 4.507 1.069 370.482 4.21 3.2e-05 ***
## poly(age_at_scan, 2)1 -172.683 8.698 1193.855 -19.85 < 2e-16 ***
## poly(age_at_scan, 2)2 -16.532 6.884 1189.035 -2.40 0.016 *
## gendermale -0.764 0.415 1188.498 -1.84 0.066 .
## field_strength3T -0.626 0.713 230.278 -0.88 0.381
## wb_vol_ratio_icv -150.823 4.756 1194.873 -31.71 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) d[[]]R d[[]]S d[[]]P p(__,2)1 p(__,2)2 gndrml fld_3T
## dt[[v]]RRMS -0.282
## dt[[v]]SPMS -0.334 0.560
## dt[[v]]PPMS -0.194 0.498 0.478
## ply(g__,2)1 -0.365 -0.063 -0.182 -0.212
## ply(g__,2)2 0.056 0.051 -0.108 -0.096 0.055
## gendermale -0.218 0.010 -0.007 -0.104 0.129 0.059
## fld_strng3T -0.073 0.202 0.058 0.120 -0.021 -0.010 -0.053
## wb_vl_rt_cv -0.974 0.162 0.258 0.098 0.398 -0.060 0.197 -0.016
anova(subtypes)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## data[[var]] 3701 1234 3 773 28.13 <2e-16 ***
## poly(age_at_scan, 2) 17365 8683 2 1191 197.95 <2e-16 ***
## gender 149 149 1 1188 3.39 0.066 .
## field_strength 34 34 1 230 0.77 0.381
## wb_vol_ratio_icv 44108 44108 1 1195 1005.61 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Brain-PAD estimated marginal means for subtypes
Generate EMMs for all MS subtypes.
fit_subtype <- lmer(BrainPAD ~ subtype +
poly(age_at_scan, 2) +
gender +
field_strength +
wb_vol_ratio_icv +
(1|Cohort),
# data = subset(df.bl, df.bl$control0rest1 != "control"),
data = df.bl,
control = lmerControl(optimizer = "Nelder_Mead"))
emmeans(object = fit_subtype, pairwise ~ subtype)
## $emmeans
## subtype emmean SE df lower.CL upper.CL
## control 4.3 0.90 50 2.5 6.1
## CIS 6.4 0.80 33 4.8 8.1
## RRMS 11.6 0.69 20 10.1 13.0
## SPMS 13.0 0.95 72 11.1 14.9
## PPMS 10.4 0.97 64 8.5 12.4
##
## Results are averaged over the levels of: gender, field_strength
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## control - CIS -2.2 0.92 724 -2.400 0.1300
## control - RRMS -7.3 0.81 868 -9.000 <.0001
## control - SPMS -8.7 1.01 1227 -8.600 <.0001
## control - PPMS -6.2 0.95 1296 -6.500 <.0001
## CIS - RRMS -5.1 0.58 1100 -8.900 <.0001
## CIS - SPMS -6.6 0.92 1295 -7.200 <.0001
## CIS - PPMS -4.0 1.01 659 -4.000 <.0001
## RRMS - SPMS -1.4 0.76 1342 -1.900 0.3200
## RRMS - PPMS 1.1 0.87 700 1.300 0.6900
## SPMS - PPMS 2.6 0.99 1110 2.600 0.0700
##
## Results are averaged over the levels of: gender, field_strength
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 5 estimates
Save table for importing into Word
x <- as.data.frame(emmeans(object = fit_subtype, pairwise ~ subtype)$contrasts)
write.table(format(x, digits = 3), file = "~/Work/Articles/Brain age/MS/Annals Neurology/pairwise_brain_PAD.txt", sep = ",", quote = F, row.names = F)
Plot the EMMs and confidence intervals
plot_model(fit_subtype, type = "pred", terms = c("subtype")) +
theme_cowplot()

Brain-PAD boxplot by MS subtype
# calculate Ns
control_n <- with(df.bl, table(subtype))["control"]
cis_n <- with(df.bl, table(subtype))["CIS"]
rrms_n <- with(df.bl, table(subtype))["RRMS"]
spms_n <- with(df.bl, table(subtype))["SPMS"]
ppms_n <- with(df.bl, table(subtype))["PPMS"]
plot.pryr %<a-% {
with(df.bl, ehplot(BrainPAD, groups = subtype,
ylim = c(-20,52), pch = as.numeric(subtype) + 20, bg = ms.palette[as.numeric(subtype)], box = T, offset = 0.1, intervals = 50,
xlab = "Groups", ylab = "Brain-PAD (years)", main = "Baseline Brain-PAD by study group")) +
abline(0,0, lty = 2) +
text(1,51, paste("N = ", control_n, sep = ""), cex = 0.8) +
text(2,51, paste("N = ", cis_n, sep = ""), cex = 0.8) +
text(3,51, paste("N = ", rrms_n, sep = ""), cex = 0.8) +
text(4,51, paste("N = ", spms_n, sep = ""), cex = 0.8) +
text(5,51, paste("N = ", ppms_n, sep = ""), cex = 0.8)
}
plot.pryr

## integer(0)
cairo_pdf(filename = "~/Work/Brain ageing/Collaborations/MS/plots/MS_UCL_MAGNIMS_combined_group_Brain-PAD.pdf", width = 8, height = 8)
plot.pryr
## integer(0)
dev.off()
## quartz_off_screen
## 2
Brain-PAD boxplot by cohort and by MS subtype
Also, Estimate marginal means plot
Uses sjPlot to calculated EMMs from linear regression model
p1 <- ggplot(df.bl, aes(x = subtype, y = BrainPAD, fill = subtype)) +
geom_boxplot(outlier.size = 0.75) +
facet_wrap(df.bl$Cohort, nrow = 1) +
geom_hline(aes(yintercept = 0), lty = 2) +
theme_cowplot(font_size = 6) +
labs(x = "MS subtype", y = "Brain-PAD (years)") +
theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "none", axis.title.x = element_text(vjust = -3), axis.title.y = element_text(vjust = 3)) +
scale_fill_manual(values = ms.palette)
fit <- lm(BrainPAD ~ control0rest1 + poly(age_at_baseline_scan1, 2) + gender + field_strength + wb_vol_ratio_icv + Cohort, data = df.bl)
p2 <- plot_model(fit, type = "emm", terms = c("Cohort", "control0rest1"), colors = "bw", show.legend = F) +
labs(x = "Cohort", y = "Brain-PAD (years)") +
theme_cowplot(font_size = 6) +
theme(axis.title.x = element_text(vjust = -3), axis.title.y = element_text(vjust = 3))
p3 <- plot_grid(list(p1,p2), tags = T, margin = c(0.5,0.5,0.5,0.5))

ggsave("~/Work/Brain ageing/Collaborations/MS/plots/brainPAD_cohort_subtype_boxplot_EMM_plot.pdf", width = 15, height = 10, useDingbats = FALSE)
ggsave(plot = p3, "~/Work/Articles/Brain age/MS/figures/brainPAD_cohort_subtype_boxplot_EMM_plot.tif", width = 170, height = 120, device = "tiff", units = "mm")
Brain-PAD boxplot by MS subtype in cohort UCL3 only
# calculate Ns
C3.bl <- df.bl %>% filter(Cohort == "UCL3")
C3.bl$subtype <- factor(C3.bl$subtype)
control_n <- with(C3.bl, table(subtype))["control"]
rrms_n <- with(C3.bl, table(subtype))["RRMS"]
spms_n <- with(C3.bl, table(subtype))["SPMS"]
ppms_n <- with(C3.bl, table(subtype))["PPMS"]
plot.pryr %<a-% {
with(C3.bl, ehplot(BrainPAD, groups = subtype, ylim = c(-20,52), pch = as.numeric(subtype) + 20, bg = ms.palette[-2][as.numeric(subtype)], box = T, offset = 0.1, intervals = 50, xlab = "Groups", ylab = "Brain-PAD (years)", main = "Baseline Brain-PAD by study group")) +
abline(0,0, lty = 2) +
text(1,51, paste("N = ", control_n, sep = ""), cex = 1.2) +
text(2,51, paste("N = ", rrms_n, sep = ""), cex = 1.2) +
text(3,51, paste("N = ", spms_n, sep = ""), cex = 1.2) +
text(4,51, paste("N = ", ppms_n, sep = ""), cex = 1.2)
}
plot.pryr

## integer(0)
cairo_pdf("~/Work/Brain ageing/Collaborations/MS/plots/MS_cohort_C3_Brain-PAD.pdf", 8,8)
plot.pryr
## integer(0)
dev.off()
## quartz_off_screen
## 2
Cohort UCL3 statistics
Use OLS regression, no random effects necessary.
fit_UCL3 <- lm(BrainPAD ~ subtype +
poly(age_at_scan, 2) +
gender +
wb_vol_ratio_icv,
data = subset(df.bl, df.bl$Cohort == "UCL3"))
anova(fit_UCL3)
## Analysis of Variance Table
##
## Response: BrainPAD
## Df Sum Sq Mean Sq F value Pr(>F)
## subtype 3 7251 2417 60.49 <2e-16 ***
## poly(age_at_scan, 2) 2 493 246 6.17 0.0027 **
## gender 1 147 147 3.67 0.0573 .
## wb_vol_ratio_icv 1 3639 3639 91.07 <2e-16 ***
## Residuals 139 5553 40
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
emmeans(object = fit_UCL3, pairwise ~ subtype)
## $emmeans
## subtype emmean SE df lower.CL upper.CL
## control 3.0 1.00 139 1.1 5.0
## RRMS 11.7 1.34 139 9.0 14.3
## SPMS 13.7 1.70 139 10.3 17.0
## PPMS 12.7 1.82 139 9.1 16.3
##
## Results are averaged over the levels of: gender
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## control - RRMS -8.6 1.55 139 -5.600 <.0001
## control - SPMS -10.6 1.95 139 -5.500 <.0001
## control - PPMS -9.7 2.00 139 -4.900 <.0001
## RRMS - SPMS -2.0 1.93 139 -1.000 0.7400
## RRMS - PPMS -1.1 2.15 139 -0.500 0.9600
## SPMS - PPMS 0.9 2.21 139 0.400 0.9800
##
## Results are averaged over the levels of: gender
## P value adjustment: tukey method for comparing a family of 4 estimates
Brain-PAD by subtype descriptive statistics
# brain-PAD by MS patient subtypes and controls
with(df.bl, describeBy(BrainPAD, subtype, mat = T, digits = 1)[-1]) %>%
kable() %>%
kable_styling(full_width = F, position = "left")
|
|
group1
|
vars
|
n
|
mean
|
sd
|
median
|
trimmed
|
mad
|
min
|
max
|
range
|
skew
|
kurtosis
|
se
|
|
X11
|
control
|
1
|
150
|
1.1
|
6.7
|
0.6
|
1.2
|
6.5
|
-16.5
|
22
|
39
|
0.0
|
0.1
|
0.6
|
|
X12
|
CIS
|
1
|
296
|
4.6
|
7.2
|
4.3
|
4.3
|
6.5
|
-13.1
|
32
|
44
|
0.5
|
0.9
|
0.4
|
|
X13
|
RRMS
|
1
|
677
|
12.5
|
10.0
|
11.1
|
12.0
|
9.4
|
-18.2
|
49
|
67
|
0.5
|
0.3
|
0.4
|
|
X14
|
SPMS
|
1
|
111
|
17.3
|
10.5
|
16.9
|
17.1
|
11.1
|
-6.7
|
44
|
51
|
0.2
|
-0.4
|
1.0
|
|
X15
|
PPMS
|
1
|
120
|
11.2
|
10.3
|
10.3
|
10.7
|
10.6
|
-5.3
|
46
|
52
|
0.5
|
0.1
|
0.9
|
Lesion filling
To establish whether using the FSL lesion filling software influences brain-predicted age values. This analysis was conducted only in UCL patients (n = 575).
with(lesion_df, describeBy(filled_brain_age, subtype, mat = T)[-1,-1]) %>%
kable() %>%
kable_styling()
|
|
group1
|
vars
|
n
|
mean
|
sd
|
median
|
trimmed
|
mad
|
min
|
max
|
range
|
skew
|
kurtosis
|
se
|
|
X12
|
CIS
|
1
|
8
|
42
|
6.2
|
43
|
42
|
7.9
|
34
|
50
|
15
|
-0.09
|
-1.98
|
2.18
|
|
X13
|
RRMS
|
1
|
382
|
55
|
11.5
|
55
|
55
|
12.1
|
24
|
88
|
63
|
-0.03
|
-0.44
|
0.59
|
|
X14
|
SPMS
|
1
|
119
|
65
|
9.2
|
66
|
65
|
8.4
|
40
|
82
|
41
|
-0.74
|
-0.03
|
0.85
|
|
X15
|
PPMS
|
1
|
66
|
62
|
10.1
|
60
|
62
|
10.6
|
44
|
84
|
40
|
0.46
|
-0.61
|
1.25
|
Correlation between brain-predicted age from filled and unfilled images: Pearson’s r = 0.99. Median absolute error (MAE) between brain-predicted age from filled and unfilled images = 0.37 years. Mean difference between brain-predicted age from filled and unfilled images = 0.28 ± 1.29 years.
Lesion filled vs. unfilled scatterplot
ggplot(data = subset(lesion_df, lesion_df$subtype != "control"), aes(x = brain_age, y = filled_brain_age)) +
geom_abline(slope = 1) +
geom_point(aes(shape = subtype, fill = subtype), size = 2) +
labs(x = "Unfilled brain-predicted age (years)", y = "Lesion-filled brain-predicted age (years)") +
xlim(c(20,90)) +
scale_fill_manual(values = ms.palette[-1]) +
scale_shape_manual(values = c(22:25)) +
theme_cowplot() + theme(legend.position = c(0.8, 0.2))
## Warning: Removed 1441 rows containing missing values (geom_point).

ggsave("~/Work/Brain ageing/Collaborations/MS/plots/lesion_filling_brain_age_plot.pdf", width = 5, height = 5, useDingbats = FALSE)
## Warning: Removed 1441 rows containing missing values (geom_point).
Lesion filled vs. unfilled Bland-Altman plot
mean.diff <- mean(lesion_df$brain_age - lesion_df$filled_brain_age, na.rm = T)
sd.diff <- sd(lesion_df$brain_age - lesion_df$filled_brain_age, na.rm = T)
ggplot(data = subset(lesion_df, lesion_df$subtype != "control"), aes(x = ((brain_age + filled_brain_age)/2), y = brain_age - filled_brain_age)) +
geom_abline(slope = 0, lty = 2) +
geom_point(aes(shape = subtype, fill = subtype), size = 2) +
geom_hline(yintercept = mean.diff, color = "darkgoldenrod1", lwd = 1) + # mean difference line
geom_hline(yintercept = mean.diff + 1.96*sd.diff, color = "darkgoldenrod2", lty = 2) + # upper 95% line
geom_hline(yintercept = mean.diff - 1.96*sd.diff, color = "darkgoldenrod2", lty = 2) + # lower 95% line
# geom_smooth(method = "lm", level = 0.95, color = "black", lwd = 0.3) +
labs(x = "Mean of filled/unfilled brain-predicted age (years)", y = "Unfilled - Lesion-filled brain-predicted age (years)") +
# ylim(c(-20,20)) +
scale_fill_manual(values = ms.palette[-1]) +
scale_shape_manual(values = c(22:25)) +
theme_cowplot() + theme(legend.position = c(0.1, 0.8)) +
annotate("text", x = 25, y = mean.diff + 1.96*sd.diff + 0.5, label = "+1.96*SD", color = "darkgoldenrod2") +
annotate("text", x = 25, y = mean.diff - 1.96*sd.diff - 0.5, label = "-1.96*SD", color = "darkgoldenrod2") +
annotate("text", x = 27.5, y = mean.diff + 0.8, label = "Mean difference", color = "darkgoldenrod2")
## Warning: Removed 1441 rows containing missing values (geom_point).

ggsave("~/Work/Brain ageing/Collaborations/MS/plots/lesion_filling_brain_age_BA_plot.pdf", width = 8, height = 5, useDingbats = FALSE)
## Warning: Removed 1441 rows containing missing values (geom_point).
Correlates of brain-PAD at baseline
EDSS score, an index of disability
LME model accounting for fixed effects of age at baseline, age^2, gender, field strength, normalised brain volume and random effects of Cohort.
summary(run_lm("EDSSbaseline", df.bl))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +
## wb_vol_ratio_icv + (1 | Cohort)
## Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 7813
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.989 -0.634 -0.047 0.611 4.819
##
## Random effects:
## Groups Name Variance Std.Dev.
## Cohort (Intercept) 8.43 2.90
## Residual 45.05 6.71
## Number of obs: 1174, groups: Cohort, 14
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 133.865 4.122 1061.648 32.47 < 2e-16 ***
## data[[var]] 0.637 0.142 1116.023 4.50 7.6e-06 ***
## poly(age_at_scan, 2)1 -186.531 9.346 1165.039 -19.96 < 2e-16 ***
## poly(age_at_scan, 2)2 -25.621 7.329 1160.993 -3.50 0.00049 ***
## gendermale -0.958 0.422 1159.898 -2.27 0.02328 *
## field_strength3T -1.790 0.734 396.618 -2.44 0.01512 *
## wb_vol_ratio_icv -156.427 4.924 1165.186 -31.77 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dt[[]] p(__,2)1 p(__,2)2 gndrml fld_3T
## data[[var]] -0.374
## ply(g__,2)1 -0.333 -0.224
## ply(g__,2)2 0.082 -0.127 0.052
## gendermale -0.221 0.007 0.108 0.049
## fld_strng3T -0.035 0.031 -0.012 -0.028 -0.051
## wb_vl_rt_cv -0.972 0.290 0.366 -0.076 0.196 -0.041
When predicting brain-PAD in a LME model, the effect of EDSS at baseline beta = 0.64, 95% CI = 0.36, 0.91, p = 8e-06.
Testing for a polynomial fit of EDSS baseline
fit.edss.poly <- lmer(BrainPAD ~
poly(EDSSbaseline,2) +
poly(age_at_scan, 2) +
gender +
field_strength +
wb_vol_ratio_icv +
(1|Cohort),
data = subset(df.bl, !is.na(df.bl$EDSSbaseline)),
control = lmerControl(optimizer = "Nelder_Mead"))
summary(fit.edss.poly)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## BrainPAD ~ poly(EDSSbaseline, 2) + poly(age_at_scan, 2) + gender +
## field_strength + wb_vol_ratio_icv + (1 | Cohort)
## Data: subset(df.bl, !is.na(df.bl$EDSSbaseline))
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 7797
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.926 -0.628 -0.053 0.607 4.821
##
## Random effects:
## Groups Name Variance Std.Dev.
## Cohort (Intercept) 8.42 2.90
## Residual 44.99 6.71
## Number of obs: 1174, groups: Cohort, 14
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 135.338 3.993 1050.157 33.89 < 2e-16 ***
## poly(EDSSbaseline, 2)1 43.388 9.451 1113.921 4.59 4.9e-06 ***
## poly(EDSSbaseline, 2)2 -11.381 7.043 1165.171 -1.62 0.10637
## poly(age_at_scan, 2)1 -175.130 8.764 1164.110 -19.98 < 2e-16 ***
## poly(age_at_scan, 2)2 -23.270 6.916 1159.298 -3.36 0.00079 ***
## gendermale -0.954 0.421 1158.860 -2.26 0.02371 *
## field_strength3T -1.799 0.733 395.462 -2.45 0.01455 *
## wb_vol_ratio_icv -156.399 4.920 1164.186 -31.79 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) p(EDSS,2)1 p(EDSS,2)2 p(__,2)1 p(__,2)2 gndrml fld_3T
## pl(EDSS,2)1 -0.294
## pl(EDSS,2)2 0.002 -0.061
## ply(g__,2)1 -0.363 -0.224 0.026
## ply(g__,2)2 0.073 -0.122 -0.076 0.040
## gendermale -0.228 0.007 -0.005 0.108 0.050
## fld_strng3T -0.033 0.030 0.008 -0.012 -0.029 -0.051
## wb_vl_rt_cv -0.975 0.289 -0.004 0.367 -0.076 0.196 -0.042
Non-parametric correlation between brain-PAD and EDSS at baseline
with(df.bl, cor.test(BrainPAD, EDSSbaseline, method = "spearman"))
## Warning in cor.test.default(BrainPAD, EDSSbaseline, method = "spearman"):
## Cannot compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: BrainPAD and EDSSbaseline
## S = 2e+08, p-value <2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.26
Plot EDSS histogram by subtype
ggplot(subset(df.bl, !is.na(df.bl$EDSSbaseline)), aes(EDSSbaseline, fill = subtype)) +
scale_fill_manual(values = ms.palette[-1]) +
geom_histogram(bins = 20) +
facet_wrap(~ subtype) +
theme_cowplot()

Test for interaction between subtype and EDSS on brain-PAD
fit.edss <- lmer(BrainPAD ~ EDSSbaseline * subtype + poly(age_at_baseline_scan1,2) + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control"))
round(as.matrix(anova(fit.edss)["EDSSbaseline:subtype",]),3) %>%
kable(digits = 2) %>%
kable_styling()
|
|
Sum Sq
|
Mean Sq
|
NumDF
|
DenDF
|
F value
|
Pr(>F)
|
|
EDSSbaseline:subtype
|
138
|
46
|
3
|
1158
|
1.1
|
0.36
|
Use simple slopes from jtools to extract adjusted slopes for each subtype. Need to fit model without age^2 as poly() is incompatible with sim_slopes().
fit.edss2 <- lmer(BrainPAD ~ EDSSbaseline * subtype + age_at_baseline_scan1 + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control"))
sim_slopes(fit.edss2, pred = "EDSSbaseline", modx = "subtype", johnson_neyman = F)
## SIMPLE SLOPES ANALYSIS
##
## Slope of EDSSbaseline when subtype = CIS:
## Est. S.E. df p
## 0.15 0.40 0.37 0.71
##
## Slope of EDSSbaseline when subtype = RRMS:
## Est. S.E. df p
## 0.79 0.21 3.78 0.00
##
## Slope of EDSSbaseline when subtype = SPMS:
## Est. S.E. df p
## 0.20 0.53 0.38 0.71
##
## Slope of EDSSbaseline when subtype = PPMS:
## Est. S.E. df p
## 0.27 0.46 0.58 0.56
Use interact_plot() from jtools to plot the adjusted slopes per group.
edss.plot <- interact_plot(fit.edss2, pred = "EDSSbaseline", modx = "subtype",
plot.points = T,
interval = T,
vary.lty = T,
facet.modx = T,
x.label = "EDSS", y.label = "Brain-PAD (years)",
point.size = 0.5,
modx.labels = c("CIS", "RRMS", "SPMS", "PPMS")) +
geom_hline(yintercept = 0, lty = 2) +
scale_fill_manual(values = ms.palette[2:5], name = "MS subtype") +
scale_color_manual(values = ms.palette[2:5], name = "MS subtype") +
theme_cowplot(font_size = 9)
# ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/EDSS_brain-PAD_plot.pdf", height = 5, width = 8, useDingbats = FALSE)
Age at diagnosis
LME accounting for fixed effects of age at baseline, age^2, gender, and random effects of Cohort and field strength. Exclude CIS patients and healthy controls.
summary(run_lm("disease_onset_age", subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS")))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +
## wb_vol_ratio_icv + (1 | Cohort)
## Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 5947
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.729 -0.605 -0.035 0.611 4.234
##
## Random effects:
## Groups Name Variance Std.Dev.
## Cohort (Intercept) 10.9 3.30
## Residual 42.7 6.53
## Number of obs: 900, groups: Cohort, 14
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 134.7440 4.0645 766.4048 33.15 < 2e-16 ***
## data[[var]] -0.1611 0.0359 891.8587 -4.49 8.2e-06 ***
## poly(age_at_scan, 2)1 -117.9941 11.9737 892.6532 -9.85 < 2e-16 ***
## poly(age_at_scan, 2)2 -13.8667 6.6754 883.2836 -2.08 0.0381 *
## gendermale -0.1854 0.4704 881.6072 -0.39 0.6936
## field_strength3T 2.7825 0.9648 189.7490 2.88 0.0044 **
## wb_vol_ratio_icv -151.5788 5.2407 888.6360 -28.92 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dt[[]] p(__,2)1 p(__,2)2 gndrml fld_3T
## data[[var]] -0.005
## ply(g__,2)1 -0.278 -0.743
## ply(g__,2)2 0.047 0.036 -0.033
## gendermale -0.217 -0.086 0.125 0.067
## fld_strng3T -0.097 0.062 -0.055 -0.004 -0.042
## wb_vl_rt_cv -0.926 -0.287 0.492 -0.060 0.199 -0.010
When predicting brain-PAD in a LME model, the effect of age at diagnosis at baseline beta = -0.16, 95% CI = -0.23, -0.09, p = 8e-06.
Test for interaction between subtype and age at diagnosis on brain-PAD:
fit.age <- lmer(BrainPAD ~ disease_onset_age * subtype + poly(age_at_baseline_scan1,2) + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS"))
round(as.matrix(anova(fit.age)["disease_onset_age:subtype",]),3) %>%
kable(digits = 2) %>%
kable_styling()
|
|
Sum Sq
|
Mean Sq
|
NumDF
|
DenDF
|
F value
|
Pr(>F)
|
|
disease_onset_age:subtype
|
51
|
26
|
2
|
877
|
0.6
|
0.55
|
Use simple slopes from jtools to extract adjusted slopes for each subtype.
fit.age2 <- lmer(BrainPAD ~ disease_onset_age * subtype + age_at_baseline_scan1 + gender + field_strength + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS"))
sim_slopes(fit.age2, pred = "disease_onset_age", modx = "subtype", johnson_neyman = F)
## SIMPLE SLOPES ANALYSIS
##
## Slope of disease_onset_age when subtype = RRMS:
## Est. S.E. df p
## -0.37 0.05 -6.75 0.00
##
## Slope of disease_onset_age when subtype = SPMS:
## Est. S.E. df p
## -0.57 0.09 -6.39 0.00
##
## Slope of disease_onset_age when subtype = PPMS:
## Est. S.E. df p
## -0.51 0.10 -5.35 0.00
age.plot <- interact_plot(fit.age2, pred = "disease_onset_age", modx = "subtype",
plot.points = T, interval = T, vary.lty = T, facet.modx = T,
x.label = "Age at clincial diagnosis (years)", y.label = "Brain-PAD (years)",
point.size = 0.5,
modx.labels = c("RRMS", "SPMS", "PPMS")) +
geom_hline(yintercept = 0, lty = 2) +
scale_fill_manual(values = ms.palette[3:5], name = "MS subtype") +
scale_color_manual(values = ms.palette[3:5], name = "MS subtype") +
theme_cowplot(font_size = 9)
# ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/diagnosis_age_brain-PAD_plot.pdf", height = 5, width = 8, useDingbats = FALSE)
Time since diagnosis
LME accounting for fixed effects of age at baseline, age^2 gender, and random effects of Cohort and field strength. Exclude controls, CIS patients and anyone with a time since diagnosis = 0.
summary(run_lm("disease_duration_at_baseline_scan1", subset(df.bl, df.bl$subtype != "CIS" & df.bl$disease_duration > 0)))
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## BrainPAD ~ data[[var]] + poly(age_at_scan, 2) + gender + field_strength +
## wb_vol_ratio_icv + (1 | Cohort)
## Data: data
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 5677
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.781 -0.601 -0.036 0.614 4.185
##
## Random effects:
## Groups Name Variance Std.Dev.
## Cohort (Intercept) 10.8 3.28
## Residual 43.4 6.59
## Number of obs: 857, groups: Cohort, 14
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 128.0279 4.3956 774.8252 29.13 < 2e-16 ***
## data[[var]] 0.1558 0.0364 849.2322 4.28 2.1e-05 ***
## poly(age_at_scan, 2)1 -165.1610 8.3879 849.5646 -19.69 < 2e-16 ***
## poly(age_at_scan, 2)2 -13.7866 6.7148 839.9938 -2.05 0.0404 *
## gendermale -0.1343 0.4841 838.7126 -0.28 0.7815
## field_strength3T 2.6556 0.9757 181.8368 2.72 0.0071 **
## wb_vol_ratio_icv -151.6503 5.3483 845.1505 -28.35 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) dt[[]] p(__,2)1 p(__,2)2 gndrml fld_3T
## data[[var]] -0.339
## ply(g__,2)1 -0.266 -0.317
## ply(g__,2)2 0.058 -0.024 -0.008
## gendermale -0.222 0.082 0.062 0.060
## fld_strng3T -0.067 -0.053 0.009 -0.006 -0.043
## wb_vl_rt_cv -0.971 0.283 0.303 -0.063 0.187 -0.014
When predicting brain-PAD in a LME model, the effect of time since diagnosis at baseline beta = 0.16, 95% CI = 0.08, 0.23, p = 2e-05.
Test for interaction between subtype and time since diagnosis on brain-PAD
fit.time <- lmer(BrainPAD ~ disease_duration_at_baseline_scan1 * subtype + poly(age_at_baseline_scan1,2) + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS" & df.bl$disease_duration > 0))
round(as.matrix(anova(fit.time)["disease_duration_at_baseline_scan1:subtype",]),3) %>%
kable(digits = 2) %>%
kable_styling()
|
|
Sum Sq
|
Mean Sq
|
NumDF
|
DenDF
|
F value
|
Pr(>F)
|
|
disease_duration_at_baseline_scan1:subtype
|
84
|
42
|
2
|
797
|
0.97
|
0.38
|
Use simple slopes from jtools to extract adjusted slopes for each subtype.
fit.time2 <- lmer(BrainPAD ~ disease_duration_at_baseline_scan1 * subtype + age_at_baseline_scan1 + gender + field_strength + wb_vol_ratio_icv + (1|Cohort), data = subset(df.bl, df.bl$subtype != "control" & df.bl$subtype != "CIS" & df.bl$disease_duration > 0))
sim_slopes(fit.time2, pred = "disease_duration_at_baseline_scan1", modx = "subtype", johnson_neyman = F)
## SIMPLE SLOPES ANALYSIS
##
## Slope of disease_duration_at_baseline_scan1 when subtype = RRMS:
## Est. S.E. df p
## 0.19 0.04 4.29 0.00
##
## Slope of disease_duration_at_baseline_scan1 when subtype = SPMS:
## Est. S.E. df p
## 0.08 0.07 1.06 0.29
##
## Slope of disease_duration_at_baseline_scan1 when subtype = PPMS:
## Est. S.E. df p
## 0.02 0.13 0.12 0.90
time.plot <- interact_plot(fit.time2, pred = "disease_duration_at_baseline_scan1", modx = "subtype",
plot.points = T, interval = T, vary.lty = T, facet.modx = T, point.size = 0.5,
x.label = "Time since clinical diagnosis (years)", y.label = "Brain-PAD (years)",
modx.labels = c("RRMS", "SPMS", "PPMS")) +
geom_hline(yintercept = 0, lty = 2) +
scale_fill_manual(values = ms.palette[3:5], name = "MS subtype") +
scale_color_manual(values = ms.palette[3:5], name = "MS subtype") +
theme_cowplot(font_size = 9)
# ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/diagnosis_years_brain-PAD_plot.pdf", height = 5, width = 8, useDingbats = FALSE)
Arrange EDSS, age at diagnosis and time since diagnosis plots
Use cowplot package.
cowplot::plot_grid(edss.plot, age.plot, time.plot, labels = "AUTO", ncol = 1)

ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/clinical_brain-PAD_plots.pdf", height = 15, width = 8, useDingbats = FALSE)
ggsave(filename = "~/Work/Articles/Brain age/MS/figures/Fig2_clinical_brain-PAD_plots.tif", height = 160, width = 80, device = "tiff", units = "mm", dpi = 300)
Brain age adjusted for brain volumes
Function to get standardised beta coefficients from lmer, from stack exchange: https://stats.stackexchange.com/questions/123366/lmer-standardized-regression-coefficients
lm.beta.lmer <- function(mod) {
b <- fixef(mod)[-1]
sd.x <- apply(getME(mod,"X")[,-1],2,sd)
sd.y <- sd(getME(mod,"y"))
b*sd.x/sd.y
}
Baseline clinical measures, adjusting for whole-brain volume as ratio of ICV, along with standard covariates.
EDSS_baseline <- lm.beta.lmer(lmer(EDSSbaseline ~ BrainPAD + wb_vol_ratio_icv + poly(age_at_scan, 2) + gender + field_strength + (1|Cohort), data = df.bl, control = lmerControl(optimizer = "Nelder_Mead")))
Time_since_diagnosis <- lm.beta.lmer(model <- lmer(disease_duration ~ BrainPAD + wb_vol_ratio_icv + poly(age_at_scan, 2) + gender + field_strength + (1|Cohort), data = df.bl, control = lmerControl(optimizer = "Nelder_Mead")))
Age_at_onset <- lm.beta.lmer(lmer(disease_onset_age ~ BrainPAD + wb_vol_ratio_icv + poly(age_at_scan, 2) + gender + field_strength + (1|Cohort), data = df.bl, control = lmerControl(optimizer = "Nelder_Mead")))
round(as.data.frame(rbind(EDSS_baseline, Time_since_diagnosis, Age_at_onset)),3) %>%
kable() %>%
kable_styling(full_width = F, position = "left")
|
|
BrainPAD
|
wb_vol_ratio_icv
|
poly(age_at_scan, 2)1
|
poly(age_at_scan, 2)2
|
gendermale
|
field_strength3T
|
|
EDSS_baseline
|
0.14
|
-0.14
|
0.26
|
0.10
|
0.00
|
-0.07
|
|
Time_since_diagnosis
|
0.24
|
-0.10
|
0.41
|
0.07
|
-0.04
|
-0.06
|
|
Age_at_onset
|
-0.20
|
0.09
|
0.77
|
-0.06
|
0.03
|
0.04
|
DMT
Effects of disease modifying treatment on brain-PAD.
table(df.bl$DMT_YesNoNA, df.bl$subtype) %>%
kable() %>%
kable_styling(full_width = F, position = "left")
|
|
control
|
CIS
|
RRMS
|
SPMS
|
PPMS
|
|
NO
|
0
|
234
|
285
|
52
|
104
|
|
YES
|
0
|
60
|
356
|
51
|
8
|
Descriptive statistics brain-PAD by DMT status
df.bl %>%
filter(control0rest1 == "MS") %>%
group_by(DMT_YesNoNA) %>%
dplyr::summarise(n = n(), mean = mean(BrainPAD), sd = sd(BrainPAD), min = min(BrainPAD), max = max(BrainPAD)) %>%
kable(digits = 2) %>%
kable_styling(full_width = F, position = "left")
|
DMT_YesNoNA
|
n
|
mean
|
sd
|
min
|
max
|
|
NO
|
675
|
8.2
|
9.1
|
-13.07
|
47
|
|
YES
|
475
|
14.2
|
10.7
|
-18.18
|
49
|
|
NA
|
54
|
14.8
|
9.9
|
-0.73
|
42
|
Fit LME model
fit_DMT <- lmer(BrainPAD ~
subtype +
poly(age_at_scan, 2) +
gender +
field_strength +
wb_vol_ratio_icv +
DMT_YesNoNA +
(1|Cohort),
data = df.bl,
control = lmerControl(optimizer = "Nelder_Mead"))
emmeans(object = fit_DMT, pairwise ~ DMT_YesNoNA)
## $emmeans
## DMT_YesNoNA emmean SE df lower.CL upper.CL
## NO 10.2 0.70 21.3 8.8 11.6
## YES 12.1 0.75 27.6 10.6 13.7
##
## Results are averaged over the levels of: subtype, gender, field_strength
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
##
## $contrasts
## contrast estimate SE df t.ratio p.value
## NO - YES -1.91 0.49 1072 -3.900 0.0001
##
## Results are averaged over the levels of: subtype, gender, field_strength
## Degrees-of-freedom method: kenward-roger
EDSS progression survival analysis
Based on Arman Eshaghi’s code used in Eshaghi et al., 2018 Annals of Neurology. Function for characterising EDSS progression, based on different rates of change and different baseline EDSS values
is_sustained_progression <- function(edssAtStart, change){
sustainedProgression <- FALSE
#if start of edss is 0, 1.5 increase is considered sustained progression
if ((edssAtStart < 1) & (change >= 1.5)) {
sustainedProgression <- TRUE
}
#if start of edss is 6 or above, 0.5 increase is considered sustained progression
else if ((edssAtStart >= 6) & (change >= 0.5 )) {
sustainedProgression <- TRUE
}
#if start of edss is more than zero but less than 6, sustained progression is by 1 increase in edss
else if ((edssAtStart >= 1 ) & (edssAtStart < 6 ) & (change >= 1 )) {
sustainedProgression <- TRUE
}
return(sustainedProgression)
}
Determine change in EDSS from baseline to last follow-up
Select latest EDSS per subject in subjects with 2 or more assessments
y1 <- df %>%
filter(!subtype == "control") %>%
filter(!is.na(EDSSbaseline)) %>%
group_by(PatientID) %>%
top_n(1, interval) %>%
ungroup() %>%
dplyr::rename(latest_EDSS = EDSSatScan) %>%
dplyr::select(PatientID, interval, EDSSbaseline, latest_EDSS) %>%
filter(!is.na(latest_EDSS)) %>%
mutate(EDSSchange = latest_EDSS - EDSSbaseline)
# apply Arman's function
y1$EDSS_progression <- mapply(is_sustained_progression, y1$EDSSbaseline, y1$EDSSchange)
## get baseline brain-PAD and brain volumetric measures
y2 <- df %>%
filter(!subtype == "control") %>%
filter(interval == 0) %>%
filter(!is.na(EDSSbaseline)) %>%
dplyr::rename(BrainPAD_baseline = BrainPAD) %>%
dplyr::rename(GM_vol_baseline = GM_vol) %>%
dplyr::rename(WM_vol_baseline = WM_vol) %>%
dplyr::rename(CSF_vol_baseline = CSF_vol) %>%
dplyr::rename(WBV_baseline = WBV) %>%
dplyr::rename(ICV_baseline = ICV) %>%
dplyr::select(-one_of('interval'))
y3 <- right_join(y1, y2, by = c("PatientID")) %>%
filter(!is.na(latest_EDSS))
Numbers of EDSS progressors
The number of MS patients with >= 2 EDSS scores was 1143.
table(y3$EDSS_progression) # calculate proportion of patients who progress
##
## FALSE TRUE
## 840 303
round(prop.table(table(y3$EDSS_progression)),3) # calculate percentage of patients who progress
##
## FALSE TRUE
## 0.73 0.27
Run survival analysis
Using brain-PAD with normal covariates
# creating new response function
S <- Surv(time = y3$interval, event = y3$EDSS_progression)
# Brain-PAD, age, sex model
surv.model <- coxph(S ~ BrainPAD_baseline + poly(age_at_baseline_scan1,2) + gender + field_strength, data = y3)
summary(surv.model)
## Call:
## coxph(formula = S ~ BrainPAD_baseline + poly(age_at_baseline_scan1,
## 2) + gender + field_strength, data = y3)
##
## n= 1143, number of events= 303
##
## coef exp(coef) se(coef) z Pr(>|z|)
## BrainPAD_baseline 2.30e-02 1.02e+00 5.80e-03 3.97 7.3e-05
## poly(age_at_baseline_scan1, 2)1 1.11e+01 6.33e+04 2.12e+00 5.22 1.8e-07
## poly(age_at_baseline_scan1, 2)2 8.87e-01 2.43e+00 2.07e+00 0.43 0.669
## gendermale 2.32e-01 1.26e+00 1.20e-01 1.93 0.054
## field_strength3T 3.47e-01 1.42e+00 1.70e-01 2.04 0.042
##
## BrainPAD_baseline ***
## poly(age_at_baseline_scan1, 2)1 ***
## poly(age_at_baseline_scan1, 2)2
## gendermale .
## field_strength3T *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## BrainPAD_baseline 1.02 9.77e-01 1.0117 1.03e+00
## poly(age_at_baseline_scan1, 2)1 63298.14 1.58e-05 998.7908 4.01e+06
## poly(age_at_baseline_scan1, 2)2 2.43 4.12e-01 0.0418 1.41e+02
## gendermale 1.26 7.93e-01 0.9961 1.60e+00
## field_strength3T 1.42 7.07e-01 1.0132 1.98e+00
##
## Concordance= 0.648 (se = 0.017 )
## Rsquare= 0.051 (max possible= 0.953 )
## Likelihood ratio test= 60.2 on 5 df, p=1e-11
## Wald test = 62.8 on 5 df, p=3e-12
## Score (logrank) test = 65.5 on 5 df, p=9e-13
# Check the assumptions of proportional hazards are met
cox.zph(surv.model)
## rho chisq p
## BrainPAD_baseline 0.145349 7.54e+00 0.006035
## poly(age_at_baseline_scan1, 2)1 -0.068122 1.44e+00 0.229532
## poly(age_at_baseline_scan1, 2)2 0.166316 7.78e+00 0.005279
## gendermale -0.000737 1.67e-04 0.989698
## field_strength3T -0.163475 9.14e+00 0.002502
## GLOBAL NA 2.22e+01 0.000485
Check that the brain-PAD line is horizontal.
plot(cox.zph(surv.model)[1])
The hazard ratio for brain-PAD on time-to-disease-progression was HR (95% CI) = 1.02, 1.01, 1.03. That means for every additional +1 year of brain-PAD there is a 1.02% increase in the likelihood of EDSS progression. Extrapolated over 5 years of brain-PAD, there is a 1.12 increase in the likelihood of EDSS progression.
Adding normalised brain volume as a covariate
# creating new response function
S <- Surv(time = y3$interval, event = y3$EDSS_progression)
# Brain-PAD, age, sex model
surv.model <- coxph(S ~ BrainPAD_baseline + poly(age_at_baseline_scan1,2) + gender + field_strength + wb_vol_ratio_icv, data = y3)
summary(surv.model)
## Call:
## coxph(formula = S ~ BrainPAD_baseline + poly(age_at_baseline_scan1,
## 2) + gender + field_strength + wb_vol_ratio_icv, data = y3)
##
## n= 1143, number of events= 303
##
## coef exp(coef) se(coef) z
## BrainPAD_baseline -4.81e-03 9.95e-01 7.83e-03 -0.61
## poly(age_at_baseline_scan1, 2)1 2.10e+00 8.14e+00 2.71e+00 0.77
## poly(age_at_baseline_scan1, 2)2 -2.02e-01 8.17e-01 2.09e+00 -0.10
## gendermale 4.87e-02 1.05e+00 1.25e-01 0.39
## field_strength3T 3.86e-01 1.47e+00 1.71e-01 2.25
## wb_vol_ratio_icv -9.42e+00 8.14e-05 1.73e+00 -5.45
## Pr(>|z|)
## BrainPAD_baseline 0.539
## poly(age_at_baseline_scan1, 2)1 0.438
## poly(age_at_baseline_scan1, 2)2 0.923
## gendermale 0.697
## field_strength3T 0.024 *
## wb_vol_ratio_icv 5.1e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## BrainPAD_baseline 9.95e-01 1.00e+00 9.80e-01 1.01e+00
## poly(age_at_baseline_scan1, 2)1 8.14e+00 1.23e-01 4.04e-02 1.64e+03
## poly(age_at_baseline_scan1, 2)2 8.17e-01 1.22e+00 1.37e-02 4.88e+01
## gendermale 1.05e+00 9.52e-01 8.22e-01 1.34e+00
## field_strength3T 1.47e+00 6.80e-01 1.05e+00 2.06e+00
## wb_vol_ratio_icv 8.14e-05 1.23e+04 2.75e-06 2.41e-03
##
## Concordance= 0.661 (se = 0.018 )
## Rsquare= 0.074 (max possible= 0.953 )
## Likelihood ratio test= 87.9 on 6 df, p=<2e-16
## Wald test = 96.8 on 6 df, p=<2e-16
## Score (logrank) test = 98.8 on 6 df, p=<2e-16
# Check the assumptions of proportional hazards are met
cox.zph(surv.model)
## rho chisq p
## BrainPAD_baseline 0.0804 2.224 0.135892
## poly(age_at_baseline_scan1, 2)1 -0.0893 2.830 0.092543
## poly(age_at_baseline_scan1, 2)2 0.1581 7.318 0.006827
## gendermale -0.0284 0.268 0.604561
## field_strength3T -0.1616 8.835 0.002955
## wb_vol_ratio_icv -0.0612 1.328 0.249135
## GLOBAL NA 24.602 0.000405
Time-to-EDSS progression Kaplan-Meier plots
Run survplot on survival object
Based on a median split of brain-PAD.
km.plot.df <- y3 %>% dplyr::mutate(split_BrainPAD = ntile(BrainPAD_baseline, 2))
S <- Surv(time = km.plot.df$interval, event = km.plot.df$EDSS_progression) # response function
sv.object <- survfit(S ~ split_BrainPAD, data = km.plot.df)
# survplot <- ggsurvplot(sv.object,
# ggtheme = theme_cowplot(font_size = 5, line_size = 0.2, rel_large = 1),
# risk.table = F,
# risk.table.fontsize = 1.2,
# cumcensor = F,
# conf.int = T,
# palette = c("blue", "red2"),
# linetype = c(1,3),
# size = 0.5,
# censor = F,
# xlab = "Time (years)",
# legend = c(0.7,0.85),
# legend.labs = c("Brain-PAD<median", "Brain-PAD>median"))
# survplot$plot <- survplot$plot + theme(legend.key.size = unit(1, "line"))
# survplot
# # ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/KM_brain-PAD_plot.pdf", height = 8, width = 8, print(survplot, newpage = FALSE), useDingbats = FALSE)
# ggsave(filename = "~/Work/Articles/Brain age/MS/figures/Fig5_KM_brain-PAD.tif", height = 50, width = 80, print(survplot, newpage = FALSE), device = "tiff", dpi = 300, units = "mm")
The median value = 9.68 years.
Longitudinal brain-age analysis
The total number of people with two or more scans was n = 1266. This included n = 1155 MS patients and n = 111 controls.
With 3 or more scans, there were n = 509 MS patients and n = 64 controls.
Determine change in brain-PAD from baseline to last follow-up
## select latest brain-PAD per subject in subjects with 2 or more assessments
z1 <- df %>%
filter(NoScans >= 2) %>%
group_by(PatientID) %>%
top_n(1, interval) %>%
ungroup() %>%
dplyr::rename(latest_BrainPAD = BrainPAD) %>%
dplyr::rename(latest_wb_vol_ratio_icv = wb_vol_ratio_icv) %>%
dplyr::select(PatientID, interval, latest_BrainPAD, latest_wb_vol_ratio_icv)
## baseline brain-PAD
z2 <- df %>%
filter(NoScans >= 2) %>%
filter(interval == 0) %>%
filter(!is.na(BrainPAD)) %>%
dplyr::rename(BrainPAD_baseline = BrainPAD) %>%
#dplyr::rename(GM_vol_baseline = GM_vol) %>%
#dplyr::rename(WM_vol_baseline = WM_vol) %>%
dplyr::rename(wb_vol_baseline = wb_vol_ratio_icv) %>%
dplyr::select(-one_of('interval'))
## calculate change in brain-PAD between baseline and latest brain-PAD
z3 <- right_join(z1, z2, by = c("PatientID")) %>%
mutate(BrainPAD_change = latest_BrainPAD - BrainPAD_baseline) %>%
mutate(wb_vol_ratio_icv_change = latest_wb_vol_ratio_icv - wb_vol_baseline)
Mean annualised rates of change in brain-PAD per group
z3 %>%
dplyr::group_by(subtype) %>%
dplyr::summarise(n = n(), mean = mean(BrainPAD_change/interval, na.rm = T), SD = sd(BrainPAD_change/interval, na.rm = T), median = median(BrainPAD_change/interval, na.rm = T), min = min(BrainPAD_change/interval, na.rm = T), max = max(BrainPAD_change/interval, na.rm = T)) %>%
kable(digits = 2) %>%
kable_styling(full_width = F, position = "left")
|
subtype
|
n
|
mean
|
SD
|
median
|
min
|
max
|
|
control
|
111
|
0.03
|
2.0
|
-0.09
|
-4.7
|
9.9
|
|
CIS
|
279
|
0.23
|
2.2
|
0.12
|
-11.7
|
11.5
|
|
RRMS
|
653
|
0.29
|
1.7
|
0.17
|
-12.2
|
16.2
|
|
SPMS
|
104
|
-0.29
|
1.6
|
-0.24
|
-6.1
|
7.0
|
|
PPMS
|
119
|
0.40
|
1.5
|
0.30
|
-4.7
|
4.2
|
Determine change in EDSS from baseline to last follow-up
## select latest EDSS per subject in subjects with 2 or more assessments
a1 <- df %>%
filter(NoScans >= 2) %>%
group_by(PatientID) %>%
top_n(1, interval) %>%
ungroup() %>%
dplyr::rename(latest_EDSSatScan = EDSSatScan) %>%
dplyr::select(PatientID, interval, latest_EDSSatScan)
## baseline EDSS
a2 <- df %>%
filter(NoScans >= 2) %>%
filter(interval == 0) %>%
filter(!is.na(EDSSatScan)) %>%
dplyr::rename(EDSSatScan_baseline = EDSSatScan) %>%
dplyr::rename(BrainPAD_baseline = BrainPAD) %>%
#dplyr::rename(GM_vol_baseline = GM_vol) %>%
#dplyr::rename(WM_vol_baseline = WM_vol) %>%
dplyr::rename(WB_vol_baseline = wb_vol_ratio_icv) %>%
dplyr::select(-one_of('interval'))
## calculate change in brain-PAD between baseline and latest brain-PAD
a3 <- right_join(a1, a2, by = c("PatientID")) %>%
mutate(EDSS_change = latest_EDSSatScan - EDSSatScan_baseline)
Mean annualised rates of change in EDSS per group
a3 %>%
dplyr::group_by(subtype) %>%
dplyr::summarise(n = n(), mean = mean(EDSS_change/interval, na.rm = T), SD = sd(EDSS_change/interval, na.rm = T), median = median(EDSS_change/interval, na.rm = T), min = min(EDSS_change/interval, na.rm = T), max = max(EDSS_change/interval, na.rm = T)) %>%
kable(digits = 2) %>%
kable_styling(full_width = F, position = "left")
|
subtype
|
n
|
mean
|
SD
|
median
|
min
|
max
|
|
CIS
|
257
|
-0.26
|
1.05
|
0.00
|
-6.86
|
3.0
|
|
RRMS
|
649
|
0.12
|
0.45
|
0.00
|
-2.24
|
3.3
|
|
SPMS
|
104
|
0.14
|
0.29
|
0.00
|
-0.64
|
1.3
|
|
PPMS
|
119
|
0.36
|
0.63
|
0.17
|
-1.01
|
3.0
|
Relationship between annualised EDSS change and brain-PAD change
The total number of patients with two or more scans was n = 1155.
delta.df <- join(a3, z3)
with(delta.df, cor.test(EDSS_change, BrainPAD_change, method = "pearson"))
##
## Pearson's product-moment correlation
##
## data: EDSS_change and BrainPAD_change
## t = 9, df = 1000, p-value <2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.21 0.32
## sample estimates:
## cor
## 0.26
with(delta.df[complete.cases(delta.df),], pcor.test(EDSS_change, BrainPAD_change, wb_vol_ratio_icv_change, method = "pearson"))
## estimate p.value statistic n gp Method
## 1 0.081 0.011 2.5 975 1 pearson
Historgams of baseline EDSS scores and EDSS changes
plot1 <- ggplot(df.bl, aes(x = EDSSbaseline)) + geom_histogram(binwidth = 1, fill = "dodgerblue", colour = "black") + xlab("EDSS at baseline") + theme_cowplot()
plot2 <- ggplot(delta.df, aes(x = EDSS_change)) + geom_histogram(binwidth = 1, fill = "dodgerblue", colour = "black") + xlab("EDSS change") + theme_cowplot()
plot3 <- ggplot(subset(df.bl, df.bl$control0rest1 == "MS"), aes(x = BrainPAD)) + geom_histogram(binwidth = 1, fill = "darkgoldenrod2", colour = "black") + xlab("Brain-PAD at baseline") + theme_cowplot()
plot4 <- ggplot(subset(delta.df, delta.df$control0rest1 == "MS"), aes(x = BrainPAD_change)) + geom_histogram(binwidth = 1, fill = "darkgoldenrod2", colour = "black") + xlab("Brain-PAD change") + theme_cowplot()
plot5 <- ggplot(delta.df, aes(x = EDSS_change, y = BrainPAD_change)) + geom_point(aes(colour = subtype)) + geom_smooth(method = "lm", colour = "black") + labs(x = "EDSS change", y = "Brain-PAD change") + theme_cowplot() + scale_color_manual(values = ms.palette[-1], name = "MS subtype")
pg <- cowplot::plot_grid(cowplot::plot_grid(plot1, plot2, plot3, plot4, labels = "AUTO", ncol = 2), plot5, ncol = 2, labels = c("", "E"), rel_widths = c(2,1.5))
pg

cowplot::save_plot("~/Work/Brain ageing/Collaborations/MS/plots/histograms_change_EDSS_brain-PAD_plot.pdf", pg, base_asp = 2.5)
Interaction between subtype and EDSS change
fit.change <- lm(BrainPAD_change ~ EDSS_change * subtype, data = delta.df)
anova(fit.change)
## Analysis of Variance Table
##
## Response: BrainPAD_change
## Df Sum Sq Mean Sq F value Pr(>F)
## EDSS_change 1 1452 1452 82.48 <2e-16 ***
## subtype 3 212 71 4.01 0.0074 **
## EDSS_change:subtype 3 206 69 3.90 0.0087 **
## Residuals 1089 19174 18
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Covarying for change in normalised brain volumes
fit.change2 <- lm(BrainPAD_change ~ EDSS_change * subtype + wb_vol_ratio_icv_change, data = delta.df)
anova(fit.change2)
## Analysis of Variance Table
##
## Response: BrainPAD_change
## Df Sum Sq Mean Sq F value Pr(>F)
## EDSS_change 1 1452 1452 117.03 < 2e-16 ***
## subtype 3 212 71 5.70 0.00072 ***
## wb_vol_ratio_icv_change 1 5769 5769 464.99 < 2e-16 ***
## EDSS_change:subtype 3 111 37 2.98 0.03075 *
## Residuals 1088 13500 12
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Use jtools package to get slopes from the model, per subtype.
sim_slopes(fit.change, pred = "EDSS_change", modx = "subtype", johnson_neyman = F, digits = 4)
## SIMPLE SLOPES ANALYSIS
##
## Slope of EDSS_change when subtype = CIS:
## Est. S.E. t val. p
## 0.8434 0.2189 3.8524 0.0001
##
## Slope of EDSS_change when subtype = RRMS:
## Est. S.E. t val. p
## 1.2534 0.1459 8.5883 0.0000
##
## Slope of EDSS_change when subtype = SPMS:
## Est. S.E. t val. p
## -0.6953 0.6510 -1.0680 0.2857
##
## Slope of EDSS_change when subtype = PPMS:
## Est. S.E. t val. p
## 0.5882 0.3464 1.6983 0.0897
interact_plot(fit.change, pred = "EDSS_change", modx = "subtype", plot.points = T, interval = T, facet.modx = T, point.size = 0.6, line.thickness = 0.5,
x.label = "EDSS annualised change", y.label = "Brain-PAD annualised change", modx.labels = c("CIS", "RRMS", "SPMS", "PPMS")) +
geom_hline(yintercept = 0, lty = 2) + theme_cowplot(font_size = 9) +
scale_fill_manual(values = ms.palette[-1], name = "MS subtype") +
scale_color_manual(values = ms.palette[-1], name = "MS subtype")

ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/change_EDSS_brain-PAD_plot.pdf", height = 5, width = 8, useDingbats = FALSE)
ggsave(filename = "~/Work/Articles/Brain age/MS/figures/Fig3_change_EDSS_brain-PAD.tif", height = 60, width = 80, device = "tiff", units = "mm", dpi = 300)
Correlate baseline brain-PAD with the number of follow-up scans completed in the n=104 with >1 scan.
with(subset(df.bl, df.bl$NoScans > 1 & df.bl$subtype == "SPMS"), cor.test(BrainPAD, NoScans, method = "spearman"))
## Warning in cor.test.default(BrainPAD, NoScans, method = "spearman"): Cannot
## compute exact p-value with ties
##
## Spearman's rank correlation rho
##
## data: BrainPAD and NoScans
## S = 2e+05, p-value = 0.003
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## -0.29
Longitudinal brain-predicted age trajectories
Interaction between group and time MS vs. controls
Conditional growth model - random effects of participant and cohort
model_int.group <- lmer(BrainPAD ~ control0rest1 * interval + poly(age_at_baseline_scan1, 2) + gender + field_strength + (interval|PatientID) + (1|Cohort), data = df, control = lmerControl(optimizer = "Nelder_Mead"))
round(anova(model_int.group)["control0rest1:interval",],3)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## control0rest1:interval 27.7 27.7 1 1326 5.84 0.016 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model_int.group)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## BrainPAD ~ control0rest1 * interval + poly(age_at_baseline_scan1,
## 2) + gender + field_strength + (interval | PatientID) + (1 |
## Cohort)
## Data: df
## Control: lmerControl(optimizer = "Nelder_Mead")
##
## REML criterion at convergence: 21373
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -9.741 -0.358 -0.014 0.345 8.603
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## PatientID (Intercept) 82.887 9.104
## interval 0.461 0.679 -0.06
## Cohort (Intercept) 15.207 3.900
## Residual 4.738 2.177
## Number of obs: 3564, groups: PatientID, 1354; Cohort, 15
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 0.0513 1.4471 38.4207 0.04
## control0rest1MS 12.1417 1.0279 1258.5379 11.81
## interval -0.1655 0.1496 1374.1462 -1.11
## poly(age_at_baseline_scan1, 2)1 -59.1972 16.3337 1335.7802 -3.62
## poly(age_at_baseline_scan1, 2)2 -33.7926 14.9329 1340.4326 -2.26
## gendermale 1.9018 0.5263 1337.0517 3.61
## field_strength3T -3.2593 0.9971 347.0562 -3.27
## control0rest1MS:interval 0.3722 0.1539 1325.5026 2.42
## Pr(>|t|)
## (Intercept) 0.97191
## control0rest1MS < 2e-16 ***
## interval 0.26873
## poly(age_at_baseline_scan1, 2)1 0.00030 ***
## poly(age_at_baseline_scan1, 2)2 0.02380 *
## gendermale 0.00031 ***
## field_strength3T 0.00119 **
## control0rest1MS:interval 0.01575 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) cn01MS intrvl p(___1,2)1 p(___1,2)2 gndrml fld_3T
## cntrl0rs1MS -0.613
## interval -0.066 0.093
## pl(___1,2)1 0.072 -0.159 -0.001
## pl(___1,2)2 0.026 -0.055 0.003 0.026
## gendermale -0.147 0.027 0.003 0.027 0.059
## fld_strng3T -0.275 0.024 0.002 0.016 -0.027 -0.041
## cntrl0r1MS: 0.063 -0.098 -0.972 0.001 -0.003 -0.003 0.000
Use sjPlots to visualise the interactions, using estimated marginal means
plot_model(model_int.group, type = "emm", terms = c("interval [all]", "control0rest1"), colors = c("darkgreen", "firebrick"), legend.title = "Group", grid = F, show.data = F) +
theme_cowplot() +
labs(x = "Interval from baseline scan (years)", y = "Brain-PAD (years)") +
geom_hline(yintercept = 0, lty = 2)
Generate estimate marginal trends for healthy controls and for all MS/CIS combined, using annualised difference between baseline and final follow-up.
Interaction between group and time MS subtypes
Conditional growth model - random effects of participant and cohort
model_int.subtype <- lmer(BrainPAD ~ subtype * interval + poly(age_at_baseline_scan1, 2) + gender + field_strength + (interval|PatientID) + (1|Cohort), data = subset(df, df$subtype != "control"), control = lmerControl(optimizer = "Nelder_Mead"))
round(anova(model_int.subtype)["subtype:interval",],3)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## subtype:interval 78.2 26.1 3 846 4.99 0.002 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Generate EMMs for healthy controls and for MS subtypes, using annualised difference between baseline and final follow-up.
# emtrends(object = model_int.subtype, pairwise ~ subtype, var = "interval")
plot_model(model_int.subtype, type = "emm", terms = c("interval [all]", "subtype"),
legend.title = "Group", show.data = F, grid = F, colors = "bw") +
theme_cowplot() +
labs(x = "Interval from baseline scan (years)", y = "Brain-PAD (years)") +
geom_hline(yintercept = 0, lty = 2)

Longitudinal brain-PAD by interval plots
Randomly sub-sampling 50% of the data.
sub.sample.df <- df %>%
filter(NoScans >= 2) %>%
group_by(PatientID) %>%
sample_frac(0.5) %>%
ungroup()
ggplot(data = sub.sample.df, aes(x = interval, y = BrainPAD, fill = subtype)) +
geom_hline(yintercept = 0, lty = 2) +
geom_line(aes(group = PatientID, colour = subtype), alpha = 0.5, linetype = 1, size = 0.25) +
geom_point(aes(colour = subtype), size = 0.5, alpha = 0.5) +
labs(x = "Time (years)", y = "Brain-predicted age difference (years)") +
scale_fill_manual(values = ms.palette) +
scale_color_manual(values = ms.palette) +
# facet_grid(Cohort) +
theme_cowplot() + theme(legend.position = "none")
## Warning: Removed 1 rows containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).

ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/longitudinal_brain-PAD_time_plot.pdf", height = 6, width = 6, useDingbats = FALSE)
## Warning: Removed 1 rows containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).
ggplot(data = df, aes(x = age_at_scan, y = BrainPAD, fill = subtype)) +
geom_hline(yintercept = 0, lty = 2) +
geom_line(aes(group = PatientID, colour = subtype), alpha = 0.6, linetype = 1, size = 0.2) +
labs(x = "Time (years)", y = "Brain-predicted age difference (years)") +
scale_fill_manual(values = ms.palette) +
scale_color_manual(values = ms.palette) +
facet_wrap(~ subtype, scales = "free_x") +
theme_cowplot() + theme(legend.position = "none")
## Warning: Removed 1 rows containing missing values (geom_path).

Age at diagnosis and longitudinal brain-PAD
model_int.onset <- lmer(BrainPAD ~ disease_onset_age * interval + poly(age_at_baseline_scan1, 2) + gender + field_strength + (interval|PatientID) + (1|Cohort), data = subset(df, df$control0rest1 == "MS"), control = lmerControl(optimizer = "Nelder_Mead"))
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning: Some predictor variables are on very different scales: consider
## rescaling
anova(model_int.onset)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## disease_onset_age 937 937 1 1035 175.43 < 2e-16
## interval 38 38 1 696 7.11 0.0079
## poly(age_at_baseline_scan1, 2) 347 173 2 1044 32.46 2.1e-14
## gender 65 65 1 1127 12.18 0.0005
## field_strength 26 26 1 179 4.87 0.0286
## disease_onset_age:interval 6 6 1 693 1.06 0.3042
##
## disease_onset_age ***
## interval **
## poly(age_at_baseline_scan1, 2) ***
## gender ***
## field_strength *
## disease_onset_age:interval
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Time since diagnosis and longitudinal brain-PAD
model_int.duration <- lmer(BrainPAD ~ disease_duration * interval + poly(age_at_baseline_scan1, 2) + gender + field_strength + (interval|PatientID) + (1|Cohort), data = subset(df, df$control0rest1 == "MS"), control = lmerControl(optimizer = "Nelder_Mead"))
## Warning: Some predictor variables are on very different scales: consider
## rescaling
## Warning: Some predictor variables are on very different scales: consider
## rescaling
anova(model_int.duration)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## disease_duration 934 934 1 1014 179.95 < 2e-16
## interval 332 332 1 526 63.90 8.3e-15
## poly(age_at_baseline_scan1, 2) 462 231 2 1076 44.56 < 2e-16
## gender 62 62 1 1085 11.87 0.00059
## field_strength 23 23 1 172 4.52 0.03500
## disease_duration:interval 173 173 1 748 33.27 1.2e-08
##
## disease_duration ***
## interval ***
## poly(age_at_baseline_scan1, 2) ***
## gender ***
## field_strength *
## disease_duration:interval ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
onset.plt <- plot_model(model_int.onset, type = "pred", terms = c("interval [all]", "disease_onset_age[20,30,40,50,60]"), legend.title = "Onset age (years)", show.data = F, grid = F, colors = "bw") +
geom_hline(yintercept = 0, lty = 2, lwd = 0.5) +
labs(x = "Time since baseline (years)", y = "Brain-PAD (years)") +
theme_cowplot(font_size = 8) +
theme(legend.key.size = unit(1.5, "line"))
duration.plt <- plot_model(model_int.duration, type = "pred", terms = c("interval [all]", "disease_duration[0,7.5,15]"), legend.title = "Years since diagnosis", show.data = F, grid = F, colors = "bw") +
geom_hline(yintercept = 0, lty = 2, lwd = 0.5) +
labs(x = "Time since baseline (years)", y = "Brain-PAD (years)") +
theme_cowplot(font_size = 8) +
theme(legend.key.size = unit(1.5, "line"))
cowplot::plot_grid(onset.plt, duration.plt, nrow = 2)

ggsave(filename = "~/Work/Articles/Brain age/MS/figures/Fig4_time_onset.tif", height = 120, width = 80, device = "tiff", units = "mm", dpi = 300)
Plot(s) with age and estimated age for each individual from each group and site
ggplot(data = df, aes(x = age_at_scan, y = BrainAge, fill = subtype)) +
geom_abline(slope = 1, lty = 2) +
geom_line(aes(group = PatientID, colour = subtype), alpha = 0.5, linetype = 1, size = 0.25) +
geom_point(aes(colour = subtype), size = 0.5, alpha = 0.5) +
labs(x = "Age (years)", y = "Brain-predicted age (years)") +
scale_fill_manual(values = ms.palette) +
scale_color_manual(values = ms.palette) +
facet_wrap(~ Cohort) +
theme_cowplot() + theme(legend.position = "none")

ggsave(filename = "~/Work/Brain ageing/Collaborations/MS/plots/longitudinal_brain-age_years_plot.pdf", height = 6, width = 10, useDingbats = FALSE)